#include "ofxOnnxRuntime.h" namespace ofxOnnxRuntime { #ifdef _MSC_VER static std::wstring to_wstring(const std::string &str) { unsigned len = str.size() * 2; setlocale(LC_CTYPE, ""); wchar_t *p = new wchar_t[len]; mbstowcs(p, str.c_str(), len); std::wstring wstr(p); delete[] p; return wstr; } #endif void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp) { // Store data types this->input_dtype = base_setting.input_dtype; this->output_dtype = base_setting.output_dtype; Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); session_options.SetIntraOpNumThreads(1); session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); if (base_setting.infer_type == INFER_CUDA) { OrtCUDAProviderOptions opts; opts.device_id = 0; opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; opts.do_copy_in_default_stream = 0; opts.arena_extend_strategy = 0; session_options.AppendExecutionProvider_CUDA(opts); } this->timestamp = timestamp; this->debug = debug; this->batch_size = batch_size; this->setup2(onnx_path, session_options); } void BaseHandler::setup2(const std::string & onnx_path, const Ort::SessionOptions & session_options) { std::string path = ofToDataPath(onnx_path, true); std::wstring wpath(path.begin(), path.end()); // basic conversion ort_session = std::make_shared(ort_env, wpath.c_str(), session_options); setNames(); } void BaseHandler::setNames() { Ort::AllocatorWithDefaultOptions allocator; // 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) { input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get()); input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); // Some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. (?, 3, 28, 28) -> (1, 3, 28, 28) for (auto& s : input_node_dims) if (s < 0) s = batch_size; if (debug) std::cout << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl; } // 2. Calculate the product of the dimensions for (auto& f : input_node_dims) { input_node_size *= f; } if (debug) ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]); // 2. Clear up output values output_node_dims.clear(); output_values.clear(); // 3. Gets Output name/s & Shapes for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) { output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get()); auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); output_values.emplace_back(nullptr); if (debug) std::cout << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl; } } float* BaseHandler::run() { auto start = std::chrono::high_resolution_clock::now(); // starting timestamp std::vector input_tensors; size_t num_images = input_imgs.size(); if(input_imgs.size() != batch_size) { ofLog() << "Input images do not match batch size. Inference FAILED."; return dummy_output_tensor.front().GetTensorMutableData(); } // transform std::string -> const char* std::vector input_names_char(input_node_names.size(), nullptr); std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), [&](const std::string& str) { return str.c_str(); }); std::vector output_names_char(output_node_names.size(), nullptr); std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), [&](const std::string& str) { return str.c_str(); }); std::vector batch_values_f; std::vector batch_values_int32; batch_values_f.reserve(input_node_size * batch_size); // Reserve space but don't initialize batch_values_int32.reserve(input_node_size * batch_size); // Reserve space but don't initialize if (input_dtype == ModelDataType::FLOAT32){ // I have a list of imgs, these need to be converted from images into input for the model (int or float) for(size_t i = 0; i < batch_size; i++) { convertImageToMatFloat(input_imgs[i], batch_values_f, i); } // 2. Create tensor with batch values { input data, input size, model input dims, model input size} input_tensors.emplace_back(Ort::Value::CreateTensor( memory_info_handler, batch_values_f.data(), input_node_size, input_node_dims.data(), input_node_dims.size())); } else if (input_dtype == ModelDataType::INT32) { // I have a list of imgs, these need to be converted from images into input for the model (int or float) for(size_t i = 0; i < batch_size; i++) { convertImageToMatInt32(input_imgs[i], batch_values_int32, i); } // 2. Create tensor with batch values { input data, input size, model input dims, model input size} input_tensors.emplace_back(Ort::Value::CreateTensor( memory_info_handler, batch_values_int32.data(), input_node_size, input_node_dims.data(), input_node_dims.size())); } try { // 3. Run inference, { in names, input data, num of inputs, output names, num of outputs } ofLog() << "run"; output_values = ort_session->Run(Ort::RunOptions{ nullptr }, input_names_char.data(), input_tensors.data(), input_names_char.size(), output_names_char.data(), output_names_char.size()); ofLog() << "ran"; if (debug) { // Gets the address of the first value auto& out = output_values.front(); // Get tensor shape information Ort::TensorTypeAndShapeInfo info = out.GetTensorTypeAndShapeInfo(); std::vector output_dims = info.GetShape(); // Print the dimensions std::cout << "Output tensor dimensions: ["; for (size_t i = 0; i < output_dims.size(); i++) { std::cout << output_dims[i]; if (i < output_dims.size() - 1) { std::cout << ", "; } } std::cout << "]" << std::endl; // Optional: Print total number of elements size_t total_elements = 1; for (auto& dim : output_dims) { if (dim > 0) { // Handle dynamic dimensions total_elements *= static_cast(dim); } } std::cout << "Total elements: " << total_elements << std::endl; } // if (timestamp) { // auto end = std::chrono::high_resolution_clock::now(); // std::chrono::duration elapsed = end - start; // std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; // } return output_values.front().GetTensorMutableData(); } catch (const Ort::Exception& ex) { std::cout << "ERROR running model inference: " << ex.what() << std::endl; return dummy_output_tensor.front().GetTensorMutableData(); } } /* * * Utilties (。・∀・)ノ゙ * */ // Add separate methods for float and int32 conversion void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector& values, size_t& idx) { // Your existing conversion code for float ofPixels& pix = img->getPixels(); int width = img->getWidth(); int height = img->getHeight(); int channels = pix.getNumChannels(); cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); cv::InputArray inputArray(cvImage); image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false); std::memcpy( values.data() + idx * channels * width * height, image_array.data, channels * width * height * sizeof(float) ); } void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector& values, size_t& idx) { // New conversion code for int32 ofPixels& pix = img->getPixels(); int width = img->getWidth(); int height = img->getHeight(); int channels = pix.getNumChannels(); cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); cv::InputArray inputArray(cvImage); cv::Mat intMat = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(height, width), (0, 0, 0), false, false); intMat.convertTo(image_array, CV_32S); std::memcpy( values.data() + idx * channels * width * height, image_array.data, channels * width * height * sizeof(int32_t) ); } void BaseHandler::setInputs(std::vector& in) { this->input_imgs = in; } // Prints the shape of the given tensor (ex. input: (1, 1, 512, 512)) std::string BaseHandler::PrintShape(const std::vector& v) { std::stringstream ss; for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x"; ss << v[v.size() - 1]; return ss.str(); } Ort::Value BaseHandler::GenerateTensor(int batch_size) { // Random number generation setup std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution dis(0.0f, 255.0f); // Random values between 0 and 255 // Calculate the total number of elements for a single tensor (without batch dimension) {?, 8} -> 8 int tensor_size = CalculateProduct(input_node_dims); // Create a vector to hold all the values for the batch (8 * (4)batch_size) -> 32 std::vector batch_values(batch_size * tensor_size); // Fill the batch with random values std::generate(batch_values.begin(), batch_values.end(), [&]() { return dis(gen); }); // Fill the batch with random values std::generate(batch_values.begin(), batch_values.end(), [&]() { return dis(gen); }); // Create the batched dimensions by inserting the batch size at the beginning of the original dimensions std::vector batched_dims = { }; // Start with batch size batched_dims.insert(batched_dims.end(), input_node_dims.begin(), input_node_dims.end()); // Add the remaining dimensions batched_dims[0] = batch_size; return VectorToTensor(batch_values, batched_dims); } int BaseHandler::CalculateProduct(const std::vector& v) { int total = 1; for (auto& i : v) total *= i; return total; } Ort::Value BaseHandler::VectorToTensor(std::vector& data, const std::vector& shape) { // Create a tensor from the provided data, shape, and memory info auto tensor = Ort::Value::CreateTensor(memory_info_handler, data.data(), data.size(), shape.data(), shape.size()); // Return the created tensor return tensor; } }