From 8ebf2b892558782e22dcfc09463b46c3c034c2f3 Mon Sep 17 00:00:00 2001
From: cailean <caileannn@gmail.com>
Date: Sun, 27 Apr 2025 23:09:03 +0100
Subject: [PATCH] movenet

---
 src/ofxOnnxRuntime.cpp     |  64 ++++----
 src/ofxOnnxRuntimeThread.h |  74 +++++++++
 temp.cpp                   | 297 -------------------------------------
 3 files changed, 113 insertions(+), 322 deletions(-)
 create mode 100644 src/ofxOnnxRuntimeThread.h
 delete mode 100644 temp.cpp

diff --git a/src/ofxOnnxRuntime.cpp b/src/ofxOnnxRuntime.cpp
index 3648728..607b6ec 100644
--- a/src/ofxOnnxRuntime.cpp
+++ b/src/ofxOnnxRuntime.cpp
@@ -211,43 +211,57 @@ namespace ofxOnnxRuntime
         int channels = pix.getNumChannels();
 
         cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
-        cv::InputArray inputArray(cvImage);
-        image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false);
-
-        std::memcpy(
-            values.data() + idx * channels * width * height,
-            image_array.data,
-            channels * width * height * sizeof(float)
-        );
+
+		// Resize to 192x192 if needed
+		cv::Mat resizedImage;
+		if (width != 192 || height != 192) {
+			cv::resize(cvImage, resizedImage, cv::Size(192, 192));
+		} else {
+			resizedImage = cvImage;
+		}
+
+		// Convert to float32 & normalise (keeping the 0-255 range)
+		cv::Mat floatImage;
+		resizedImage.convertTo(floatImage, CV_32F, 1.0/255.0);
+
+		// Calculate offset in destination array
+		size_t elementsPerImage = input_node_dims[1] * input_node_dims[2] * input_node_dims[3];
+		size_t startPos = idx * elementsPerImage;
+
+        // Copy directly
+		float* floatPtr = reinterpret_cast<float*>(floatImage.data);
+		std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos);
     }
 
     void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {
-        ofPixels& pix = img->getPixels();
+		ofPixels& pix = img->getPixels();
 		int width = img->getWidth();
 		int height = img->getHeight();
 		int channels = pix.getNumChannels();
 		
+		// Create OpenCV Mat from ofImage pixels
 		cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
 		
-		// Create blob with the correct dimensions
-		cv::Mat floatMat = cv::dnn::blobFromImage(cvImage, 1/255.0, 
-												cv::Size(256, 256), 
-												cv::Scalar(0, 0, 0), false, false);
-		
-		// Convert float blob to int32
-		cv::Mat intMat;
-		floatMat.convertTo(intMat, CV_32S);
+		// Resize to 192x192 if needed
+		cv::Mat resizedImage;
+		if (width != 192 || height != 192) {
+			cv::resize(cvImage, resizedImage, cv::Size(192, 192));
+		} else {
+			resizedImage = cvImage;
+		}
 		
-		// Calculate how many values we need to add
-		size_t elementsPerImage = channels * 256* 256;
+		// Convert uint8 image to int32 (keeping the 0-255 range)
+		cv::Mat intImage;
+		resizedImage.convertTo(intImage, CV_32SC3);
+
+		// Calculate offset in destination array
+		size_t elementsPerImage = 192 * 192 * 3;
 		size_t startPos = idx * elementsPerImage;
 		
-		// Copy data from intMat to values
-		int32_t* intData = (int32_t*)intMat.data;
-		for (size_t i = 0; i < elementsPerImage; i++) {
-			values[startPos + i] = intData[i];
-		}
-    }
+		// Copy directly
+		int32_t* intPtr = reinterpret_cast<int32_t*>(intImage.data);
+		std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos);
+	}
 
 	void BaseHandler::setInputs(std::vector<ofImage*>& in) {
 		this->input_imgs = in;
diff --git a/src/ofxOnnxRuntimeThread.h b/src/ofxOnnxRuntimeThread.h
new file mode 100644
index 0000000..1caa862
--- /dev/null
+++ b/src/ofxOnnxRuntimeThread.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "ofMain.h"
+#include "ofxOnnxRuntime.h"
+
+namespace ofxOnnxRuntime {
+    class OnnxThread : public ofThread
+    {
+        public:
+            ofxOnnxRuntime::BaseHandler* onnx;
+            float* result = nullptr;
+            bool isInferenceComplete = false;
+            bool shouldRunInference = true;
+
+            ~OnnxThread() {
+                stop();
+                waitForThread(false);
+            }
+
+            void setup(ofxOnnxRuntime::BaseHandler* onnx) {
+                std::lock_guard<std::mutex> lock(mutex);
+                this->onnx = onnx;
+            }
+
+            void start() {
+                startThread();
+            }
+
+            void stop() {
+                stopThread();
+                condition.notify_all();
+            }
+
+            void threadedFunction() {
+                while (isThreadRunning()) {
+                    std::unique_lock<std::mutex> lock(mutex);
+                    runOnnx();
+                    condition.wait(lock);
+                }
+            }
+
+            void update() {
+                std::lock_guard<std::mutex> lock(mutex);
+                condition.notify_one();
+            }
+
+            void runOnnx() {
+                if (shouldRunInference) {
+                    result = onnx->run();
+                    isInferenceComplete = true;
+                    shouldRunInference = false;
+                }  
+            }
+
+            // Method to safely get the result
+            float* getResult() {
+                std::lock_guard<std::mutex> lock(mutex);
+                return result;
+            }
+
+            bool checkInferenceComplete() {
+                std::lock_guard<std::mutex> lock(mutex);
+                return isInferenceComplete;
+            }
+
+            void resetInferenceFlag() {
+                std::lock_guard<std::mutex> lock(mutex);
+                isInferenceComplete = false;
+            }
+
+        protected:
+            std::condition_variable condition;
+    };
+}
\ No newline at end of file
diff --git a/temp.cpp b/temp.cpp
deleted file mode 100644
index 4744494..0000000
--- a/temp.cpp
+++ /dev/null
@@ -1,297 +0,0 @@
-#include "ofxOnnxRuntime.h"
-
-namespace ofxOnnxRuntime
-{
-#ifdef _MSC_VER
-	static std::wstring to_wstring(const std::string &str)
-	{
-		unsigned len = str.size() * 2;
-		setlocale(LC_CTYPE, "");
-		wchar_t *p = new wchar_t[len];
-		mbstowcs(p, str.c_str(), len);
-		std::wstring wstr(p);
-		delete[] p;
-		return wstr;
-	}
-#endif
-	void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp)
-	{
-		// Store data types
-		this->input_dtype = base_setting.input_dtype;
-		this->output_dtype = base_setting.output_dtype;
-
-		Ort::SessionOptions session_options;
-		session_options.SetIntraOpNumThreads(1);
-		session_options.SetIntraOpNumThreads(1);
-		session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
-
-		if (base_setting.infer_type == INFER_CUDA) {
-			OrtCUDAProviderOptions opts;
-			opts.device_id = 0;
-			opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
-			opts.do_copy_in_default_stream = 0;
-			opts.arena_extend_strategy = 0;
-			session_options.AppendExecutionProvider_CUDA(opts);
-		}
-
-		this->timestamp = timestamp;
-		this->debug = debug;
-		this->batch_size = batch_size;
-		this->setup2(onnx_path, session_options);
-	}
-
-	void BaseHandler::setup2(const std::string & onnx_path, const Ort::SessionOptions & session_options)
-	{
-		std::string path = ofToDataPath(onnx_path, true);
-
-		std::wstring wpath(path.begin(), path.end());  // basic conversion
-
-		ort_session = std::make_shared<Ort::Session>(ort_env, wpath.c_str(), session_options);
-
-		setNames();
-	}
-
-	void BaseHandler::setNames()
-	{
-		Ort::AllocatorWithDefaultOptions allocator;
-
-		// 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one
-		for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) {
-			input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get());
-			input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
-
-			// Some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. (?, 3, 28, 28) -> (1, 3, 28, 28)
-			for (auto& s : input_node_dims) if (s < 0) s = batch_size;
-
-			if (debug) std::cout << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl;
-		}
-
-		// 2. Calculate the product of the dimensions
-		for (auto& f : input_node_dims) {
-			input_node_size *= f;
-		}
-
-		if (debug)  ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]);
-
-		// 2. Clear up output values
-		output_node_dims.clear();
-		output_values.clear();
-		
-		// 3. Gets Output name/s & Shapes
-		for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) {
-			output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get());
-			auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
-			
-			output_values.emplace_back(nullptr);
-
-			if (debug) std::cout << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl;
-		}
-	}
-
-	float* BaseHandler::run()
-	{
-		
-		auto start = std::chrono::high_resolution_clock::now(); // starting timestamp
-
-		std::vector<Ort::Value> input_tensors;
-
-		size_t num_images = input_imgs.size();
-
-		if(input_imgs.size() != batch_size) {
-			ofLog() << "Input images do not match batch size. Inference FAILED.";
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
-		}
-
-		// transform std::string -> const char*
-		std::vector<const char*> input_names_char(input_node_names.size(), nullptr);
-		std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char),
-			[&](const std::string& str) { return str.c_str(); });
-
-		std::vector<const char*> output_names_char(output_node_names.size(), nullptr);
-		std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char),
-			[&](const std::string& str) { return str.c_str(); });
-
-		std::vector<float> batch_values_f;
-		std::vector<int32_t> batch_values_int32;
-		batch_values_f.reserve(input_node_size * batch_size); // Reserve space but don't initialize
-		batch_values_int32.reserve(input_node_size * batch_size); // Reserve space but don't initialize
-
-
-		if (input_dtype == ModelDataType::FLOAT32){
-			// I have a list of imgs, these need to be converted from images into input for the model (int or float)
-			for(size_t i = 0; i < batch_size; i++) {
-				convertImageToMatFloat(input_imgs[i], batch_values_f, i);
-			}
-
-			// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
-			input_tensors.emplace_back(Ort::Value::CreateTensor<float>(
-				memory_info_handler, batch_values_f.data(), input_node_size,
-				input_node_dims.data(), input_node_dims.size()));
-		} 
-		else if (input_dtype == ModelDataType::INT32) {
-			// I have a list of imgs, these need to be converted from images into input for the model (int or float)
-			for(size_t i = 0; i < batch_size; i++) {
-				convertImageToMatInt32(input_imgs[i], batch_values_int32, i);
-			}
-
-			// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
-			input_tensors.emplace_back(Ort::Value::CreateTensor<int32_t>(
-			memory_info_handler, batch_values_int32.data(), input_node_size,
-			input_node_dims.data(), input_node_dims.size()));
-		}
-
-		
-		
-
-		try {
-			// 3. Run inference, { in names, input data, num of inputs, output names, num of outputs }
-			ofLog() << "run";
-			output_values = ort_session->Run(Ort::RunOptions{ nullptr }, 
-				input_names_char.data(), input_tensors.data(),
-				input_names_char.size(), output_names_char.data(), 
-				output_names_char.size());
-			ofLog() << "ran";
-			
-			if (debug) {
-				// Gets the address of the first value
-				auto& out = output_values.front();
-				// Get tensor shape information
-				Ort::TensorTypeAndShapeInfo info = out.GetTensorTypeAndShapeInfo();
-				std::vector<int64_t> output_dims = info.GetShape();
-				
-				// Print the dimensions
-				std::cout << "Output tensor dimensions: [";
-				for (size_t i = 0; i < output_dims.size(); i++) {
-					std::cout << output_dims[i];
-					if (i < output_dims.size() - 1) {
-						std::cout << ", ";
-					}
-				}
-				std::cout << "]" << std::endl;
-				
-				// Optional: Print total number of elements
-				size_t total_elements = 1;
-				for (auto& dim : output_dims) {
-					if (dim > 0) {  // Handle dynamic dimensions
-						total_elements *= static_cast<size_t>(dim);
-					}
-				}
-				std::cout << "Total elements: " << total_elements << std::endl;
-			}
-			
-			// if (timestamp) {
-			// 	auto end = std::chrono::high_resolution_clock::now();
-			// 	std::chrono::duration<double, std::milli> elapsed = end - start;
-			// 	std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
-			// }
-
-			return output_values.front().GetTensorMutableData<float>();
-
-		} catch (const Ort::Exception& ex) {
-			std::cout << "ERROR running model inference: " << ex.what() << std::endl;
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
-		}
-		
-	}
-
-	/*
-	*
-	*	Utilties (｡･∀･)ﾉﾞ
-	*
-	*/
-
-	// Add separate methods for float and int32 conversion
-    void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx) {
-        // Your existing conversion code for float
-        ofPixels& pix = img->getPixels();
-        int width = img->getWidth();
-        int height = img->getHeight();
-        int channels = pix.getNumChannels();
-
-        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
-        cv::InputArray inputArray(cvImage);
-        image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false);
-
-        std::memcpy(
-            values.data() + idx * channels * width * height,
-            image_array.data,
-            channels * width * height * sizeof(float)
-        );
-    }
-
-    void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {
-        // New conversion code for int32
-        ofPixels& pix = img->getPixels();
-        int width = img->getWidth();
-        int height = img->getHeight();
-        int channels = pix.getNumChannels();
-        
-        cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData());
-        cv::InputArray inputArray(cvImage);
-        cv::Mat intMat = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(height, width), (0, 0, 0), false, false);
-
-        intMat.convertTo(image_array, CV_32S);
-
-        std::memcpy(
-            values.data() + idx * channels * width * height,
-            image_array.data,
-            channels * width * height * sizeof(int32_t)
-        );
-    }
-
-	void BaseHandler::setInputs(std::vector<ofImage*>& in) {
-		this->input_imgs = in;
-	}
-
-	// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
-	std::string BaseHandler::PrintShape(const std::vector<int64_t>& v) {
-		std::stringstream ss;
-		for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x";
-		ss << v[v.size() - 1];
-		return ss.str();
-	}
-
-	Ort::Value BaseHandler::GenerateTensor(int batch_size) {
-		// Random number generation setup
-		std::random_device rd;
-		std::mt19937 gen(rd());
-		std::uniform_real_distribution<float> dis(0.0f, 255.0f); // Random values between 0 and 255
-
-		// Calculate the total number of elements for a single tensor (without batch dimension) {?, 8} -> 8
-		int tensor_size = CalculateProduct(input_node_dims);
-
-		// Create a vector to hold all the values for the batch (8 * (4)batch_size) -> 32
-		std::vector<float> batch_values(batch_size * tensor_size); 
-
-		// Fill the batch with random values
-		std::generate(batch_values.begin(), batch_values.end(), [&]() {
-			return dis(gen);
-		});
-
-		// Fill the batch with random values
-		std::generate(batch_values.begin(), batch_values.end(), [&]() {
-			return dis(gen);
-		});
-
-		// Create the batched dimensions by inserting the batch size at the beginning of the original dimensions
-		std::vector<int64_t> batched_dims = {  };  // Start with batch size
-		batched_dims.insert(batched_dims.end(), input_node_dims.begin(), input_node_dims.end()); // Add the remaining dimensions
-		batched_dims[0] = batch_size;
-
-		return VectorToTensor(batch_values, batched_dims);
-	}
-
-	int BaseHandler::CalculateProduct(const std::vector<int64_t>& v) {
-		int total = 1;
-		for (auto& i : v) total *= i;
-		return total;
-	}
-
-	Ort::Value BaseHandler::VectorToTensor(std::vector<float>& data, const std::vector<int64_t>& shape) {
-		// Create a tensor from the provided data, shape, and memory info
-		auto tensor = Ort::Value::CreateTensor<float>(memory_info_handler, data.data(), data.size(), shape.data(), shape.size());
-
-		// Return the created tensor
-		return tensor;
-	}
-}