You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
6.2 KiB
98 lines
6.2 KiB
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#pragma once
|
|
|
|
/*
|
|
* This file defines SessionOptions Config Keys and format of the Config Values.
|
|
*
|
|
* The Naming Convention for a SessionOptions Config Key,
|
|
* "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
|
|
* Such as "ep.cuda.use_arena"
|
|
* The Config Key cannot be empty
|
|
* The maximum length of the Config Key is 128
|
|
*
|
|
* The string format of a SessionOptions Config Value is defined individually for each Config.
|
|
* The maximum length of the Config Value is 1024
|
|
*/
|
|
|
|
// Key for disable PrePacking,
|
|
// If the config value is set to "1" then the prepacking is disabled, otherwise prepacking is enabled (default value)
|
|
static const char* const kOrtSessionOptionsConfigDisablePrepacking = "session.disable_prepacking";
|
|
|
|
// A value of "1" means allocators registered in the env will be used. "0" means the allocators created in the session
|
|
// will be used. Use this to override the usage of env allocators on a per session level.
|
|
static const char* const kOrtSessionOptionsConfigUseEnvAllocators = "session.use_env_allocators";
|
|
|
|
// Set to 'ORT' (case sensitive) to load an ORT format model.
|
|
// If unset, model type will default to ONNX unless inferred from filename ('.ort' == ORT format) or bytes to be ORT
|
|
static const char* const kOrtSessionOptionsConfigLoadModelFormat = "session.load_model_format";
|
|
|
|
// Set to 'ORT' (case sensitive) to save optimized model in ORT format when SessionOptions.optimized_model_path is set.
|
|
// If unset, format will default to ONNX unless optimized_model_filepath ends in '.ort'.
|
|
static const char* const kOrtSessionOptionsConfigSaveModelFormat = "session.save_model_format";
|
|
|
|
// If a value is "1", flush-to-zero and denormal-as-zero are applied. The default is "0".
|
|
// When multiple sessions are created, a main thread doesn't override changes from succeeding session options,
|
|
// but threads in session thread pools follow option changes.
|
|
// When ORT runs with OpenMP, the same rule is applied, i.e. the first session option to flush-to-zero and
|
|
// denormal-as-zero is only applied to global OpenMP thread pool, which doesn't support per-session thread pool.
|
|
// Note that an alternative way not using this option at runtime is to train and export a model without denormals
|
|
// and that's recommended because turning this option on may hurt model accuracy.
|
|
static const char* const kOrtSessionOptionsConfigSetDenormalAsZero = "session.set_denormal_as_zero";
|
|
|
|
// It controls to run quantization model in QDQ (QuantizelinearDeQuantizelinear) format or not.
|
|
// "0": enable. ORT does fusion logic for QDQ format.
|
|
// "1": disable. ORT doesn't do fusion logic for QDQ format.
|
|
// Its default value is "0"
|
|
static const char* const kOrtSessionOptionsDisableQuantQDQ = "session.disable_quant_qdq";
|
|
|
|
// Enable or disable gelu approximation in graph optimization. "0": disable; "1": enable. The default is "0".
|
|
// GeluApproximation has side effects which may change the inference results. It is disabled by default due to this.
|
|
static const char* const kOrtSessionOptionsEnableGeluApproximation = "optimization.enable_gelu_approximation";
|
|
|
|
// Enable or disable using device allocator for allocating initialized tensor memory. "1": enable; "0": disable. The default is "0".
|
|
// Using device allocators means the memory allocation is made using malloc/new.
|
|
static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "session.use_device_allocator_for_initializers";
|
|
|
|
// Configure whether to allow the inter_op/intra_op threads spinning a number of times before blocking
|
|
// "0": thread will block if found no job to run
|
|
// "1": default, thread will spin a number of times before blocking
|
|
static const char* const kOrtSessionOptionsConfigAllowInterOpSpinning = "session.inter_op.allow_spinning";
|
|
static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session.intra_op.allow_spinning";
|
|
|
|
// Key for using model bytes directly for ORT format
|
|
// If a session is created using an input byte array contains the ORT format model data,
|
|
// By default we will copy the model bytes at the time of session creation to ensure the model bytes
|
|
// buffer is valid.
|
|
// Setting this option to "1" will disable copy the model bytes, and use the model bytes directly. The caller
|
|
// has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
|
|
static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";
|
|
|
|
// Save information for replaying graph optimizations later instead of applying them directly.
|
|
//
|
|
// When an ONNX model is loaded, ORT can perform various optimizations on the graph.
|
|
// However, when an ORT format model is loaded, these optimizations are typically not available - this scenario must
|
|
// be supported by minimal builds.
|
|
// When loading an ONNX model, ORT can optionally save the effects of some optimizations for later replay in an ORT
|
|
// format model. These are known as "runtime optimizations" - in an ORT format model, they happen at runtime.
|
|
//
|
|
// Note: This option is only applicable when loading an ONNX model and saving an ORT format model.
|
|
//
|
|
// Note: Runtime optimizations are only supported for certain optimizations at the extended level or higher.
|
|
// Unsupported optimizations at those levels are not applied at all, while optimizations at other levels are applied
|
|
// directly.
|
|
//
|
|
// "0": disabled, "1": enabled
|
|
// The default is "0".
|
|
static const char* const kOrtSessionOptionsConfigSaveRuntimeOptimizations = "optimization.save_runtime_optimizations";
|
|
|
|
// Note: The options specific to an EP should be specified prior to appending that EP to the session options object in
|
|
// order for them to take effect.
|
|
|
|
// Specifies a list of stop op types. Nodes of a type in the stop op types and nodes downstream from them will not be
|
|
// run by the NNAPI EP.
|
|
// The value should be a ","-delimited list of op types. For example, "Add,Sub".
|
|
// If not specified, the default set of stop ops is used. To specify an empty stop ops types list and disable stop op
|
|
// exclusion, set the value to "".
|
|
static const char* const kOrtSessionOptionsConfigNnapiEpPartitioningStopOps = "ep.nnapi.partitioning_stop_ops";
|
|
|