updated runtime to work with in32 & float input

6 months ago · 302f2f6e97
6 changed files with 465 additions and 28 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,5 @@
 # Ignoring onnxruntime libs
-# /libs/onnxruntime/lib/*
+/libs/onnxruntime/lib/msys2/*
 example-*/config.make
 example-*/*.sln
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,58 @@
 {
    "files.associations": {
        "xiosbase": "cpp",
        "algorithm": "cpp",
        "array": "cpp",
        "atomic": "cpp",
        "bit": "cpp",
        "cctype": "cpp",
        "clocale": "cpp",
        "cmath": "cpp",
        "compare": "cpp",
        "concepts": "cpp",
        "cstddef": "cpp",
        "cstdint": "cpp",
        "cstdio": "cpp",
        "cstdlib": "cpp",
        "cstring": "cpp",
        "ctime": "cpp",
        "cwchar": "cpp",
        "exception": "cpp",
        "functional": "cpp",
        "initializer_list": "cpp",
        "ios": "cpp",
        "iosfwd": "cpp",
        "iostream": "cpp",
        "istream": "cpp",
        "iterator": "cpp",
        "limits": "cpp",
        "list": "cpp",
        "memory": "cpp",
        "new": "cpp",
        "numeric": "cpp",
        "optional": "cpp",
        "ostream": "cpp",
        "stdexcept": "cpp",
        "streambuf": "cpp",
        "string": "cpp",
        "system_error": "cpp",
        "tuple": "cpp",
        "type_traits": "cpp",
        "typeinfo": "cpp",
        "unordered_map": "cpp",
        "unordered_set": "cpp",
        "utility": "cpp",
        "variant": "cpp",
        "vector": "cpp",
        "xfacet": "cpp",
        "xhash": "cpp",
        "xlocale": "cpp",
        "xlocinfo": "cpp",
        "xlocnum": "cpp",
        "xmemory": "cpp",
        "xstddef": "cpp",
        "xstring": "cpp",
        "xtr1common": "cpp",
        "xutility": "cpp"
    }
 }
--- a/addon_config.mk
+++ b/addon_config.mk
@ -1,11 +1,12 @@
 meta:
 	ADDON_NAME = ofxOnnxRuntime
 	ADDON_DESCRIPTION = "ONNX Runtime addon for OpenFrameworks"
-	ADDON_AUTHOR = Yuya Hanai
+	ADDON_AUTHOR = Cailean Finn
 	ADDON_TAGS = "ONNX"
-	ADDON_URL = https://github.com/hanasaan/ofxOnnxRuntime
+	ADDON_URL = https://github.com/caileannn/ofxOnnxRuntime
 common:
 	ADDON_DEPENDENCIES = ofxOpenCv
 	ADDON_INCLUDES = libs/onnxruntime/include
 	ADDON_INCLUDES += src
 osx:
--- a/src/ofxOnnxRuntime.cpp
+++ b/src/ofxOnnxRuntime.cpp
@ -14,9 +14,12 @@ namespace ofxOnnxRuntime
 		return wstr;
 	}
 #endif
 	void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp)
 	{
 		// Store data types
 		this->input_dtype = base_setting.input_dtype;
 		this->output_dtype = base_setting.output_dtype;
 		Ort::SessionOptions session_options;
 		session_options.SetIntraOpNumThreads(1);
 		session_options.SetIntraOpNumThreads(1);
@ -70,13 +73,6 @@ namespace ofxOnnxRuntime
 		if (debug)  ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]);
 		// 3. Resize input values array to match input tensor/s
 		input_values_handler.resize(batch_size);
 		for (auto& tensor : input_values_handler) {
 			tensor.resize(input_node_size);
 		}
 		// 2. Clear up output values
 		output_node_dims.clear();
 		output_values.clear();
@ -99,20 +95,42 @@ namespace ofxOnnxRuntime
 		std::vector<Ort::Value> input_tensors;
 		size_t num_images = input_imgs.size();
 		if(input_imgs.size() != batch_size) {
 			ofLog() << "Input images do not match batch size. Inference FAILED.";
 			return dummy_output_tensor.front().GetTensorMutableData<float>();
 		}
 		// 1. Create 1-D array for all values to create tensor & push all values from input_vals to batch_vals
 		std::vector<float> batch_values;
-		batch_values.reserve(input_node_size * batch_size); // Reserve space but don't initialize
+		batch_values.resize(input_node_size * batch_size); // Reserve space but don't initialize
-		for (const auto& inner_vec : input_values_handler) {
+		std::vector<int32_t> batch_values_int;
-			for (float value : inner_vec) {
+		batch_values_int.resize(input_node_size * batch_size); // Reserve space but don't initialize
-				batch_values.push_back(value);
+
 		if (input_dtype == ModelDataType::FLOAT32){
 			// I have a list of imgs, these need to be converted from images into input for the model (int or float)
 			for(size_t i = 0; i < batch_size; i++) {
 				convertImageToMatFloat(input_imgs[i], batch_values, i);
 			}
 			// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
 			input_tensors.emplace_back(Ort::Value::CreateTensor<float>(
 				memory_info_handler, batch_values.data(), input_node_size,
 				input_node_dims.data(), input_node_dims.size()));
 		} 
 		else if (input_dtype == ModelDataType::INT32) {
 			// I have a list of imgs, these need to be converted from images into input for the model (int or float)
 			for(size_t i = 0; i < batch_size; i++) {
 				convertImageToMatInt32(input_imgs[i], batch_values_int, i);
 			}
-		// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
+			// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
-		input_tensors.emplace_back(Ort::Value::CreateTensor<float>(
+			input_tensors.emplace_back(Ort::Value::CreateTensor<int32_t>(
-			memory_info_handler, batch_values.data(), input_node_size,
+			memory_info_handler, batch_values_int.data(), input_node_size,
 			input_node_dims.data(), input_node_dims.size()));
 		}
 		// transform std::string -> const char*
 		std::vector<const char*> input_names_char(input_node_names.size(), nullptr);
@ -123,6 +141,11 @@ namespace ofxOnnxRuntime
 		std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char),
 			[&](const std::string& str) { return str.c_str(); });
 			// Before running the model, check if we have data
 		if (input_dtype == ModelDataType::INT32 && batch_values_int.empty()) {
 			ofLog() << "Error: INT32 batch values vector is empty";
 			return dummy_output_tensor.front().GetTensorMutableData<float>();
 		}
 		try {
 			// 3. Run inference, { in names, input data, num of inputs, output names, num of outputs }
@ -179,9 +202,55 @@ namespace ofxOnnxRuntime
 	*
 	*/
-	// Fills the tensor (selected by the idx) with input values from ofFloatPixels array
+	// Add separate methods for float and int32 conversion
-	void BaseHandler::setInput(ofFloatPixels &pixels, int tensor_idx, int width, int height, int channels) {
+    void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx) {
-		pixels.setFromExternalPixels(getInputTensorData()->at(tensor_idx).data(), width, height, channels);
+        // Your existing conversion code for float
        ofPixels& pix = img->getPixels();
        int width = img->getWidth();
        int height = img->getHeight();
        int channels = pix.getNumChannels();
        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
        cv::InputArray inputArray(cvImage);
        image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false);
        std::memcpy(
            values.data() + idx * channels * width * height,
            image_array.data,
            channels * width * height * sizeof(float)
        );
    }
    void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {
        ofPixels& pix = img->getPixels();
 		int width = img->getWidth();
 		int height = img->getHeight();
 		int channels = pix.getNumChannels();
 		cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
 		// Create blob with the correct dimensions
 		cv::Mat floatMat = cv::dnn::blobFromImage(cvImage, 1/255.0, 
 												cv::Size(256, 256), 
 												cv::Scalar(0, 0, 0), false, false);
 		// Convert float blob to int32
 		cv::Mat intMat;
 		floatMat.convertTo(intMat, CV_32S);
 		// Calculate how many values we need to add
 		size_t elementsPerImage = channels * 256* 256;
 		size_t startPos = idx * elementsPerImage;
 		// Copy data from intMat to values
 		int32_t* intData = (int32_t*)intMat.data;
 		for (size_t i = 0; i < elementsPerImage; i++) {
 			values[startPos + i] = intData[i];
 		}
    }
 	void BaseHandler::setInputs(std::vector<ofImage*>& in) {
 		this->input_imgs = in;
 	}
 	// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
--- a/src/ofxOnnxRuntime.h
+++ b/src/ofxOnnxRuntime.h
@ -2,6 +2,7 @@
 #include <onnxruntime_cxx_api.h>
 #include "ofMain.h"
 #include "ofxOpenCv.h"
 namespace ofxOnnxRuntime
 {
@ -12,10 +13,17 @@ namespace ofxOnnxRuntime
 		INFER_TENSORRT
 	};
 	enum ModelDataType {
 		FLOAT32,
 		INT32
 	};
 	struct BaseSetting
 	{
 		InferType infer_type;
 		int device_id;
 		ModelDataType input_dtype = FLOAT32;
 		ModelDataType output_dtype = FLOAT32;
 	};
 	class BaseHandler
@ -23,10 +31,12 @@ namespace ofxOnnxRuntime
 		public:
 			BaseHandler() {}
-			void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false);
+			void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false);
 			void setup2(const std::string& onnx_path, const Ort::SessionOptions& session_options);
 			void setNames();
-			void setInput(ofFloatPixels &pixels, int tensor_idx, int width, int height, int channels);
+			void setInputs(std::vector<ofImage*>& input_imgs);
 			void convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx);
 			void convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx);
 			float* run();
 			// Utilities ╰（‵□′）╯
@ -35,10 +45,6 @@ namespace ofxOnnxRuntime
 			int CalculateProduct(const std::vector<int64_t>& v);
 			Ort::Value VectorToTensor(std::vector<float>& data, const std::vector<int64_t>& shape);
 			std::vector<std::vector<float>>* getInputTensorData() {
 				return &this->input_values_handler;
 			}
 		protected:
 			bool debug = false;
 			bool timestamp = false;
@ -63,5 +69,11 @@ namespace ofxOnnxRuntime
 			std::vector<int64_t> batched_dims;
 			int batch_size;
 			int num_outputs = 1;
 			std::vector<ofImage*> input_imgs;
 			cv::Mat image_array;
 			ModelDataType input_dtype;
 			ModelDataType output_dtype;
 	};
 }
--- a/temp.cpp
+++ b/temp.cpp
@ -0,0 +1,297 @@
 #include "ofxOnnxRuntime.h"
 namespace ofxOnnxRuntime
 {
 #ifdef _MSC_VER
 	static std::wstring to_wstring(const std::string &str)
 	{
 		unsigned len = str.size() * 2;
 		setlocale(LC_CTYPE, "");
 		wchar_t *p = new wchar_t[len];
 		mbstowcs(p, str.c_str(), len);
 		std::wstring wstr(p);
 		delete[] p;
 		return wstr;
 	}
 #endif
 	void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp)
 	{
 		// Store data types
 		this->input_dtype = base_setting.input_dtype;
 		this->output_dtype = base_setting.output_dtype;
 		Ort::SessionOptions session_options;
 		session_options.SetIntraOpNumThreads(1);
 		session_options.SetIntraOpNumThreads(1);
 		session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
 		if (base_setting.infer_type == INFER_CUDA) {
 			OrtCUDAProviderOptions opts;
 			opts.device_id = 0;
 			opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
 			opts.do_copy_in_default_stream = 0;
 			opts.arena_extend_strategy = 0;
 			session_options.AppendExecutionProvider_CUDA(opts);
 		}
 		this->timestamp = timestamp;
 		this->debug = debug;
 		this->batch_size = batch_size;
 		this->setup2(onnx_path, session_options);
 	}
 	void BaseHandler::setup2(const std::string & onnx_path, const Ort::SessionOptions & session_options)
 	{
 		std::string path = ofToDataPath(onnx_path, true);
 		std::wstring wpath(path.begin(), path.end());  // basic conversion
 		ort_session = std::make_shared<Ort::Session>(ort_env, wpath.c_str(), session_options);
 		setNames();
 	}
 	void BaseHandler::setNames()
 	{
 		Ort::AllocatorWithDefaultOptions allocator;
 		// 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one
 		for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) {
 			input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get());
 			input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
 			// Some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. (?, 3, 28, 28) -> (1, 3, 28, 28)
 			for (auto& s : input_node_dims) if (s < 0) s = batch_size;
 			if (debug) std::cout << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl;
 		}
 		// 2. Calculate the product of the dimensions
 		for (auto& f : input_node_dims) {
 			input_node_size *= f;
 		}
 		if (debug)  ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]);
 		// 2. Clear up output values
 		output_node_dims.clear();
 		output_values.clear();
 		// 3. Gets Output name/s & Shapes
 		for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) {
 			output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get());
 			auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
 			output_values.emplace_back(nullptr);
 			if (debug) std::cout << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl;
 		}
 	}
 	float* BaseHandler::run()
 	{
 		auto start = std::chrono::high_resolution_clock::now(); // starting timestamp
 		std::vector<Ort::Value> input_tensors;
 		size_t num_images = input_imgs.size();
 		if(input_imgs.size() != batch_size) {
 			ofLog() << "Input images do not match batch size. Inference FAILED.";
 			return dummy_output_tensor.front().GetTensorMutableData<float>();
 		}
 		// transform std::string -> const char*
 		std::vector<const char*> input_names_char(input_node_names.size(), nullptr);
 		std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char),
 			[&](const std::string& str) { return str.c_str(); });
 		std::vector<const char*> output_names_char(output_node_names.size(), nullptr);
 		std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char),
 			[&](const std::string& str) { return str.c_str(); });
 		std::vector<float> batch_values_f;
 		std::vector<int32_t> batch_values_int32;
 		batch_values_f.reserve(input_node_size * batch_size); // Reserve space but don't initialize
 		batch_values_int32.reserve(input_node_size * batch_size); // Reserve space but don't initialize
 		if (input_dtype == ModelDataType::FLOAT32){
 			// I have a list of imgs, these need to be converted from images into input for the model (int or float)
 			for(size_t i = 0; i < batch_size; i++) {
 				convertImageToMatFloat(input_imgs[i], batch_values_f, i);
 			}
 			// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
 			input_tensors.emplace_back(Ort::Value::CreateTensor<float>(
 				memory_info_handler, batch_values_f.data(), input_node_size,
 				input_node_dims.data(), input_node_dims.size()));
 		} 
 		else if (input_dtype == ModelDataType::INT32) {
 			// I have a list of imgs, these need to be converted from images into input for the model (int or float)
 			for(size_t i = 0; i < batch_size; i++) {
 				convertImageToMatInt32(input_imgs[i], batch_values_int32, i);
 			}
 			// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
 			input_tensors.emplace_back(Ort::Value::CreateTensor<int32_t>(
 			memory_info_handler, batch_values_int32.data(), input_node_size,
 			input_node_dims.data(), input_node_dims.size()));
 		}
 		try {
 			// 3. Run inference, { in names, input data, num of inputs, output names, num of outputs }
 			ofLog() << "run";
 			output_values = ort_session->Run(Ort::RunOptions{ nullptr }, 
 				input_names_char.data(), input_tensors.data(),
 				input_names_char.size(), output_names_char.data(), 
 				output_names_char.size());
 			ofLog() << "ran";
 			if (debug) {
 				// Gets the address of the first value
 				auto& out = output_values.front();
 				// Get tensor shape information
 				Ort::TensorTypeAndShapeInfo info = out.GetTensorTypeAndShapeInfo();
 				std::vector<int64_t> output_dims = info.GetShape();
 				// Print the dimensions
 				std::cout << "Output tensor dimensions: [";
 				for (size_t i = 0; i < output_dims.size(); i++) {
 					std::cout << output_dims[i];
 					if (i < output_dims.size() - 1) {
 						std::cout << ", ";
 					}
 				}
 				std::cout << "]" << std::endl;
 				// Optional: Print total number of elements
 				size_t total_elements = 1;
 				for (auto& dim : output_dims) {
 					if (dim > 0) {  // Handle dynamic dimensions
 						total_elements *= static_cast<size_t>(dim);
 					}
 				}
 				std::cout << "Total elements: " << total_elements << std::endl;
 			}
 			// if (timestamp) {
 			// 	auto end = std::chrono::high_resolution_clock::now();
 			// 	std::chrono::duration<double, std::milli> elapsed = end - start;
 			// 	std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
 			// }
 			return output_values.front().GetTensorMutableData<float>();
 		} catch (const Ort::Exception& ex) {
 			std::cout << "ERROR running model inference: " << ex.what() << std::endl;
 			return dummy_output_tensor.front().GetTensorMutableData<float>();
 		}
 	}
 	/*
 	*
 	*	Utilties (｡･∀･)ﾉﾞ
 	*
 	*/
 	// Add separate methods for float and int32 conversion
    void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx) {
        // Your existing conversion code for float
        ofPixels& pix = img->getPixels();
        int width = img->getWidth();
        int height = img->getHeight();
        int channels = pix.getNumChannels();
        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
        cv::InputArray inputArray(cvImage);
        image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false);
        std::memcpy(
            values.data() + idx * channels * width * height,
            image_array.data,
            channels * width * height * sizeof(float)
        );
    }
    void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {
        // New conversion code for int32
        ofPixels& pix = img->getPixels();
        int width = img->getWidth();
        int height = img->getHeight();
        int channels = pix.getNumChannels();
        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
        cv::InputArray inputArray(cvImage);
        cv::Mat intMat = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(height, width), (0, 0, 0), false, false);
        intMat.convertTo(image_array, CV_32S);
        std::memcpy(
            values.data() + idx * channels * width * height,
            image_array.data,
            channels * width * height * sizeof(int32_t)
        );
    }
 	void BaseHandler::setInputs(std::vector<ofImage*>& in) {
 		this->input_imgs = in;
 	}
 	// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
 	std::string BaseHandler::PrintShape(const std::vector<int64_t>& v) {
 		std::stringstream ss;
 		for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x";
 		ss << v[v.size() - 1];
 		return ss.str();
 	}
 	Ort::Value BaseHandler::GenerateTensor(int batch_size) {
 		// Random number generation setup
 		std::random_device rd;
 		std::mt19937 gen(rd());
 		std::uniform_real_distribution<float> dis(0.0f, 255.0f); // Random values between 0 and 255
 		// Calculate the total number of elements for a single tensor (without batch dimension) {?, 8} -> 8
 		int tensor_size = CalculateProduct(input_node_dims);
 		// Create a vector to hold all the values for the batch (8 * (4)batch_size) -> 32
 		std::vector<float> batch_values(batch_size * tensor_size); 
 		// Fill the batch with random values
 		std::generate(batch_values.begin(), batch_values.end(), [&]() {
 			return dis(gen);
 		});
 		// Fill the batch with random values
 		std::generate(batch_values.begin(), batch_values.end(), [&]() {
 			return dis(gen);
 		});
 		// Create the batched dimensions by inserting the batch size at the beginning of the original dimensions
 		std::vector<int64_t> batched_dims = {  };  // Start with batch size
 		batched_dims.insert(batched_dims.end(), input_node_dims.begin(), input_node_dims.end()); // Add the remaining dimensions
 		batched_dims[0] = batch_size;
 		return VectorToTensor(batch_values, batched_dims);
 	}
 	int BaseHandler::CalculateProduct(const std::vector<int64_t>& v) {
 		int total = 1;
 		for (auto& i : v) total *= i;
 		return total;
 	}
 	Ort::Value BaseHandler::VectorToTensor(std::vector<float>& data, const std::vector<int64_t>& shape) {
 		// Create a tensor from the provided data, shape, and memory info
 		auto tensor = Ort::Value::CreateTensor<float>(memory_info_handler, data.data(), data.size(), shape.data(), shape.size());
 		// Return the created tensor
 		return tensor;
 	}
 }