ofxOnnxRuntime/temp.cpp


								#include "ofxOnnxRuntime.h"


								namespace ofxOnnxRuntime

								{

								#ifdef _MSC_VER

									static std::wstring to_wstring(const std::string &str)

									{

										unsigned len = str.size() * 2;

										setlocale(LC_CTYPE, "");

										wchar_t *p = new wchar_t[len];

										mbstowcs(p, str.c_str(), len);

										std::wstring wstr(p);

										delete[] p;

										return wstr;

									}

								#endif

									void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp)

									{

										// Store data types

										this->input_dtype = base_setting.input_dtype;

										this->output_dtype = base_setting.output_dtype;


										Ort::SessionOptions session_options;

										session_options.SetIntraOpNumThreads(1);

										session_options.SetIntraOpNumThreads(1);

										session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);


										if (base_setting.infer_type == INFER_CUDA) {

											OrtCUDAProviderOptions opts;

											opts.device_id = 0;

											opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;

											opts.do_copy_in_default_stream = 0;

											opts.arena_extend_strategy = 0;

											session_options.AppendExecutionProvider_CUDA(opts);

										}


										this->timestamp = timestamp;

										this->debug = debug;

										this->batch_size = batch_size;

										this->setup2(onnx_path, session_options);

									}


									void BaseHandler::setup2(const std::string & onnx_path, const Ort::SessionOptions & session_options)

									{

										std::string path = ofToDataPath(onnx_path, true);


										std::wstring wpath(path.begin(), path.end());  // basic conversion


										ort_session = std::make_shared<Ort::Session>(ort_env, wpath.c_str(), session_options);


										setNames();

									}


									void BaseHandler::setNames()

									{

										Ort::AllocatorWithDefaultOptions allocator;


										// 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one

										for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) {

											input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get());

											input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();


											// Some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. (?, 3, 28, 28) -> (1, 3, 28, 28)

											for (auto& s : input_node_dims) if (s < 0) s = batch_size;


											if (debug) std::cout << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl;

										}


										// 2. Calculate the product of the dimensions

										for (auto& f : input_node_dims) {

											input_node_size *= f;

										}


										if (debug)  ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]);


										// 2. Clear up output values

										output_node_dims.clear();

										output_values.clear();


										// 3. Gets Output name/s & Shapes

										for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) {

											output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get());

											auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();


											output_values.emplace_back(nullptr);


											if (debug) std::cout << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl;

										}

									}


									float* BaseHandler::run()

									{


										auto start = std::chrono::high_resolution_clock::now(); // starting timestamp


										std::vector<Ort::Value> input_tensors;


										size_t num_images = input_imgs.size();


										if(input_imgs.size() != batch_size) {

											ofLog() << "Input images do not match batch size. Inference FAILED.";

											return dummy_output_tensor.front().GetTensorMutableData<float>();

										}


										// transform std::string -> const char*

										std::vector<const char*> input_names_char(input_node_names.size(), nullptr);

										std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char),

											[&](const std::string& str) { return str.c_str(); });


										std::vector<const char*> output_names_char(output_node_names.size(), nullptr);

										std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char),

											[&](const std::string& str) { return str.c_str(); });


										std::vector<float> batch_values_f;

										std::vector<int32_t> batch_values_int32;

										batch_values_f.reserve(input_node_size * batch_size); // Reserve space but don't initialize

										batch_values_int32.reserve(input_node_size * batch_size); // Reserve space but don't initialize


										if (input_dtype == ModelDataType::FLOAT32){

											// I have a list of imgs, these need to be converted from images into input for the model (int or float)

											for(size_t i = 0; i < batch_size; i++) {

												convertImageToMatFloat(input_imgs[i], batch_values_f, i);

											}


											// 2. Create tensor with batch values { input data, input size, model input dims, model input size}

											input_tensors.emplace_back(Ort::Value::CreateTensor<float>(

												memory_info_handler, batch_values_f.data(), input_node_size,

												input_node_dims.data(), input_node_dims.size()));

										}

										else if (input_dtype == ModelDataType::INT32) {

											// I have a list of imgs, these need to be converted from images into input for the model (int or float)

											for(size_t i = 0; i < batch_size; i++) {

												convertImageToMatInt32(input_imgs[i], batch_values_int32, i);

											}


											// 2. Create tensor with batch values { input data, input size, model input dims, model input size}

											input_tensors.emplace_back(Ort::Value::CreateTensor<int32_t>(

											memory_info_handler, batch_values_int32.data(), input_node_size,

											input_node_dims.data(), input_node_dims.size()));

										}


										try {

											// 3. Run inference, { in names, input data, num of inputs, output names, num of outputs }

											ofLog() << "run";

											output_values = ort_session->Run(Ort::RunOptions{ nullptr },

												input_names_char.data(), input_tensors.data(),

												input_names_char.size(), output_names_char.data(),

												output_names_char.size());

											ofLog() << "ran";


											if (debug) {

												// Gets the address of the first value

												auto& out = output_values.front();

												// Get tensor shape information

												Ort::TensorTypeAndShapeInfo info = out.GetTensorTypeAndShapeInfo();

												std::vector<int64_t> output_dims = info.GetShape();


												// Print the dimensions

												std::cout << "Output tensor dimensions: [";

												for (size_t i = 0; i < output_dims.size(); i++) {

													std::cout << output_dims[i];

													if (i < output_dims.size() - 1) {

														std::cout << ", ";

													}

												}

												std::cout << "]" << std::endl;


												// Optional: Print total number of elements

												size_t total_elements = 1;

												for (auto& dim : output_dims) {

													if (dim > 0) {  // Handle dynamic dimensions

														total_elements *= static_cast<size_t>(dim);

													}

												}

												std::cout << "Total elements: " << total_elements << std::endl;

											}


											// if (timestamp) {

											// 	auto end = std::chrono::high_resolution_clock::now();

											// 	std::chrono::duration<double, std::milli> elapsed = end - start;

											// 	std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;

											// }


											return output_values.front().GetTensorMutableData<float>();


										} catch (const Ort::Exception& ex) {

											std::cout << "ERROR running model inference: " << ex.what() << std::endl;

											return dummy_output_tensor.front().GetTensorMutableData<float>();

										}


									}


									/*

									*

									*	Utilties (｡･∀･)ﾉﾞ

									*

									*/


									// Add separate methods for float and int32 conversion

								    void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx) {

								        // Your existing conversion code for float

								        ofPixels& pix = img->getPixels();

								        int width = img->getWidth();

								        int height = img->getHeight();

								        int channels = pix.getNumChannels();


								        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());

								        cv::InputArray inputArray(cvImage);

								        image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false);


								        std::memcpy(

								            values.data() + idx * channels * width * height,

								            image_array.data,

								            channels * width * height * sizeof(float)

								        );

								    }


								    void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {

								        // New conversion code for int32

								        ofPixels& pix = img->getPixels();

								        int width = img->getWidth();

								        int height = img->getHeight();

								        int channels = pix.getNumChannels();


								        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());

								        cv::InputArray inputArray(cvImage);

								        cv::Mat intMat = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(height, width), (0, 0, 0), false, false);


								        intMat.convertTo(image_array, CV_32S);


								        std::memcpy(

								            values.data() + idx * channels * width * height,

								            image_array.data,

								            channels * width * height * sizeof(int32_t)

								        );

								    }


									void BaseHandler::setInputs(std::vector<ofImage*>& in) {

										this->input_imgs = in;

									}


									// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))

									std::string BaseHandler::PrintShape(const std::vector<int64_t>& v) {

										std::stringstream ss;

										for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x";

										ss << v[v.size() - 1];

										return ss.str();

									}


									Ort::Value BaseHandler::GenerateTensor(int batch_size) {

										// Random number generation setup

										std::random_device rd;

										std::mt19937 gen(rd());

										std::uniform_real_distribution<float> dis(0.0f, 255.0f); // Random values between 0 and 255


										// Calculate the total number of elements for a single tensor (without batch dimension) {?, 8} -> 8

										int tensor_size = CalculateProduct(input_node_dims);


										// Create a vector to hold all the values for the batch (8 * (4)batch_size) -> 32

										std::vector<float> batch_values(batch_size * tensor_size);


										// Fill the batch with random values

										std::generate(batch_values.begin(), batch_values.end(), [&]() {

											return dis(gen);

										});


										// Fill the batch with random values

										std::generate(batch_values.begin(), batch_values.end(), [&]() {

											return dis(gen);

										});


										// Create the batched dimensions by inserting the batch size at the beginning of the original dimensions

										std::vector<int64_t> batched_dims = {  };  // Start with batch size

										batched_dims.insert(batched_dims.end(), input_node_dims.begin(), input_node_dims.end()); // Add the remaining dimensions

										batched_dims[0] = batch_size;


										return VectorToTensor(batch_values, batched_dims);

									}


									int BaseHandler::CalculateProduct(const std::vector<int64_t>& v) {

										int total = 1;

										for (auto& i : v) total *= i;

										return total;

									}


									Ort::Value BaseHandler::VectorToTensor(std::vector<float>& data, const std::vector<int64_t>& shape) {

										// Create a tensor from the provided data, shape, and memory info

										auto tensor = Ort::Value::CreateTensor<float>(memory_info_handler, data.data(), data.size(), shape.data(), shape.size());


										// Return the created tensor

										return tensor;

									}

								}