// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #pragma once /* * This file defines SessionOptions Config Keys and format of the Config Values. * * The Naming Convention for a SessionOptions Config Key, * "[Area][.[SubArea1].[SubArea2]...].[Keyname]" * Such as "ep.cuda.use_arena" * The Config Key cannot be empty * The maximum length of the Config Key is 128 * * The string format of a SessionOptions Config Value is defined individually for each Config. * The maximum length of the Config Value is 1024 */ // Key for disable PrePacking, // If the config value is set to "1" then the prepacking is disabled, otherwise prepacking is enabled (default value) static const char* const kOrtSessionOptionsConfigDisablePrepacking = "session.disable_prepacking"; // A value of "1" means allocators registered in the env will be used. "0" means the allocators created in the session // will be used. Use this to override the usage of env allocators on a per session level. static const char* const kOrtSessionOptionsConfigUseEnvAllocators = "session.use_env_allocators"; // Set to 'ORT' (case sensitive) to load an ORT format model. // If unset, model type will default to ONNX unless inferred from filename ('.ort' == ORT format) or bytes to be ORT static const char* const kOrtSessionOptionsConfigLoadModelFormat = "session.load_model_format"; // Set to 'ORT' (case sensitive) to save optimized model in ORT format when SessionOptions.optimized_model_path is set. // If unset, format will default to ONNX unless optimized_model_filepath ends in '.ort'. static const char* const kOrtSessionOptionsConfigSaveModelFormat = "session.save_model_format"; // If a value is "1", flush-to-zero and denormal-as-zero are applied. The default is "0". // When multiple sessions are created, a main thread doesn't override changes from succeeding session options, // but threads in session thread pools follow option changes. // When ORT runs with OpenMP, the same rule is applied, i.e. the first session option to flush-to-zero and // denormal-as-zero is only applied to global OpenMP thread pool, which doesn't support per-session thread pool. // Note that an alternative way not using this option at runtime is to train and export a model without denormals // and that's recommended because turning this option on may hurt model accuracy. static const char* const kOrtSessionOptionsConfigSetDenormalAsZero = "session.set_denormal_as_zero"; // It controls to run quantization model in QDQ (QuantizelinearDeQuantizelinear) format or not. // "0": enable. ORT does fusion logic for QDQ format. // "1": disable. ORT doesn't do fusion logic for QDQ format. // Its default value is "0" static const char* const kOrtSessionOptionsDisableQuantQDQ = "session.disable_quant_qdq"; // Enable or disable gelu approximation in graph optimization. "0": disable; "1": enable. The default is "0". // GeluApproximation has side effects which may change the inference results. It is disabled by default due to this. static const char* const kOrtSessionOptionsEnableGeluApproximation = "optimization.enable_gelu_approximation"; // Enable or disable using device allocator for allocating initialized tensor memory. "1": enable; "0": disable. The default is "0". // Using device allocators means the memory allocation is made using malloc/new. static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "session.use_device_allocator_for_initializers"; // Configure whether to allow the inter_op/intra_op threads spinning a number of times before blocking // "0": thread will block if found no job to run // "1": default, thread will spin a number of times before blocking static const char* const kOrtSessionOptionsConfigAllowInterOpSpinning = "session.inter_op.allow_spinning"; static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session.intra_op.allow_spinning"; // Key for using model bytes directly for ORT format // If a session is created using an input byte array contains the ORT format model data, // By default we will copy the model bytes at the time of session creation to ensure the model bytes // buffer is valid. // Setting this option to "1" will disable copy the model bytes, and use the model bytes directly. The caller // has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed. static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly"; // Save information for replaying graph optimizations later instead of applying them directly. // // When an ONNX model is loaded, ORT can perform various optimizations on the graph. // However, when an ORT format model is loaded, these optimizations are typically not available - this scenario must // be supported by minimal builds. // When loading an ONNX model, ORT can optionally save the effects of some optimizations for later replay in an ORT // format model. These are known as "runtime optimizations" - in an ORT format model, they happen at runtime. // // Note: This option is only applicable when loading an ONNX model and saving an ORT format model. // // Note: Runtime optimizations are only supported for certain optimizations at the extended level or higher. // Unsupported optimizations at those levels are not applied at all, while optimizations at other levels are applied // directly. // // "0": disabled, "1": enabled // The default is "0". static const char* const kOrtSessionOptionsConfigSaveRuntimeOptimizations = "optimization.save_runtime_optimizations"; // Note: The options specific to an EP should be specified prior to appending that EP to the session options object in // order for them to take effect. // Specifies a list of stop op types. Nodes of a type in the stop op types and nodes downstream from them will not be // run by the NNAPI EP. // The value should be a ","-delimited list of op types. For example, "Add,Sub". // If not specified, the default set of stop ops is used. To specify an empty stop ops types list and disable stop op // exclusion, set the value to "". static const char* const kOrtSessionOptionsConfigNnapiEpPartitioningStopOps = "ep.nnapi.partitioning_stop_ops";