Add support for channels-first image processing and update return types to std::vector<Ort::Value>

3 weeks ago · 61546e8cbe
3 changed files with 53 additions and 18 deletions
--- a/src/ofxOnnxRuntime.cpp
+++ b/src/ofxOnnxRuntime.cpp
@ -21,6 +21,7 @@ namespace ofxOnnxRuntime
 		this->output_dtype = base_setting.output_dtype;
 		this->inputWidth = base_setting.width;
 		this->inputHeight = base_setting.height;
 		this->channelsFirst = base_setting.channelsFirst;
 		Ort::SessionOptions session_options;
 		session_options.SetIntraOpNumThreads(1);
@ -57,6 +58,9 @@ namespace ofxOnnxRuntime
 	{
 		Ort::AllocatorWithDefaultOptions allocator;
 		input_node_names.clear();
 		output_node_names.clear();
 		// 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one
 		for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) {
 			input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get());
@ -90,7 +94,7 @@ namespace ofxOnnxRuntime
 		}
 	}
-	float* BaseHandler::run()
+	std::vector<Ort::Value>* BaseHandler::run()
 	{
 		auto start = std::chrono::high_resolution_clock::now(); // starting timestamp
@ -101,7 +105,7 @@ namespace ofxOnnxRuntime
 		if(input_imgs.size() != batch_size) {
 			ofLog() << "Input images do not match batch size. Inference FAILED.";
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
+			return &dummy_output_tensor;
 		}
 		// 1. Create 1-D array for all values to create tensor & push all values from input_vals to batch_vals
@ -146,7 +150,7 @@ namespace ofxOnnxRuntime
 			// Before running the model, check if we have data
 		if (input_dtype == ModelDataType::INT32 && batch_values_int.empty()) {
 			ofLog() << "Error: INT32 batch values vector is empty";
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
+			return &dummy_output_tensor;
 		}
 		try {
@ -171,7 +175,7 @@ namespace ofxOnnxRuntime
 						std::cout << ", ";
 					}
 				}
-				std::cout << "]" << std::endl;
+				std::cout << "]" << "| Length: " << output_values.size() << std::endl;
 				// Optional: Print total number of elements
 				size_t total_elements = 1;
@ -189,11 +193,11 @@ namespace ofxOnnxRuntime
 				std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
 			}
-			return output_values.front().GetTensorMutableData<float>();
+			return &output_values;
 		} catch (const Ort::Exception& ex) {
 			std::cout << "ERROR running model inference: " << ex.what() << std::endl;
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
+			return &dummy_output_tensor;
 		}
 	}
@ -222,17 +226,32 @@ namespace ofxOnnxRuntime
 			resizedImage = cvImage;
 		}
 		cv::Mat reorderedImage;
 		cv::cvtColor(resizedImage, reorderedImage, cv::COLOR_BGR2RGB);  // Convert BGR to RGB
 		// Convert to float32 & normalise (keeping the 0-255 range)
 		cv::Mat floatImage;
-		resizedImage.convertTo(floatImage, CV_32F, 1.0/255.0);
+		reorderedImage.convertTo(floatImage, CV_32F, 1.0/255.0);
 		// Calculate offset in destination array NEED TO CALC PRODUCT
 		int elementsPerImage = CalculateProduct(input_node_dims);
 		int startPos = idx * elementsPerImage;
-        // Copy directly
+		// If the model expects the channels (rgb) first, then we need to swap them around, if not, proceed as normal
-		float* floatPtr = reinterpret_cast<float*>(floatImage.data);
+		if (!channelsFirst) {
-		std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos);
+			// Convert to float, and make a copy
 			float* floatPtr = reinterpret_cast<float*>(floatImage.data);
 			std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos);
 		} else {
 			// If we need to rearrange, split into 3 mats -> the output format should be, all R first, then G, then B. Instead of RGB, RGB, for each pixel
 			std::vector<cv::Mat> splitChannels(3);
 			cv::split(floatImage, splitChannels); // split into R, G, B
 			size_t planeSize = inputWidth * inputHeight;
 			for (int c = 0; c < 3; ++c) {
 				memcpy(&values[startPos + c * planeSize], splitChannels[c].ptr<float>(), planeSize * sizeof(float));
 			}
 		}
    }
    void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {
@ -251,18 +270,31 @@ namespace ofxOnnxRuntime
 		} else {
 			resizedImage = cvImage;
 		}
 		cv::Mat reorderedImage;
 		cv::cvtColor(resizedImage, reorderedImage, cv::COLOR_BGR2RGB);  // Convert BGR to RGB
 		// Convert uint8 image to int32 (keeping the 0-255 range)
 		cv::Mat intImage;
-		resizedImage.convertTo(intImage, CV_32SC3);
+		reorderedImage.convertTo(intImage, CV_32SC3);
 		// Calculate offset in destination array CALC PRODUCT
 		int elementsPerImage = CalculateProduct(input_node_dims);
 		int startPos = idx * elementsPerImage;
-		// Copy directly
+		if(!channelsFirst) {
-		int32_t* intPtr = reinterpret_cast<int32_t*>(intImage.data);
+			// Copy directly
-		std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos);
+			int32_t* intPtr = reinterpret_cast<int32_t*>(intImage.data);
 			std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos);
 		} else {
 			std::vector<cv::Mat> splitChannels(3);
 			cv::split(intImage, splitChannels); // split into R, G, B
 			size_t planeSize = inputWidth * inputHeight;
 			for (int c = 0; c < 3; ++c) {
 				memcpy(&values[startPos + c * planeSize], splitChannels[c].ptr<int32_t>(), planeSize * sizeof(int32_t));
 			}
 		}
 	}
 	void BaseHandler::setInputs(std::vector<ofImage*>& in) {
--- a/src/ofxOnnxRuntime.h
+++ b/src/ofxOnnxRuntime.h
@ -26,6 +26,7 @@ namespace ofxOnnxRuntime
 		ModelDataType output_dtype = FLOAT32;
 		int width;
 		int height;
 		bool channelsFirst = false;
 	};
 	class BaseHandler
@ -33,13 +34,13 @@ namespace ofxOnnxRuntime
 		public:
 			BaseHandler() {}
-			void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32, 256, 256 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false);
+			void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32, 256, 256, false }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false);
 			void setup2(const std::string& onnx_path, const Ort::SessionOptions& session_options);
 			void setNames();
 			void setInputs(std::vector<ofImage*>& input_imgs);
 			void convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx);
 			void convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx);
-			float* run();
+			std::vector<Ort::Value>* run();
 			// Utilities ╰（‵□′）╯
 			std::string PrintShape(const std::vector<int64_t>& v);
@ -80,5 +81,7 @@ namespace ofxOnnxRuntime
 			int inputWidth;
 			int inputHeight;
 			bool channelsFirst;
 	};
 }
--- a/src/ofxOnnxRuntimeThread.h
+++ b/src/ofxOnnxRuntimeThread.h
@ -8,7 +8,7 @@ namespace ofxOnnxRuntime {
    {
        public:
            ofxOnnxRuntime::BaseHandler* onnx;
-            float* result = nullptr;
+            std::vector<Ort::Value>* result = nullptr;
            bool isInferenceComplete = false;
            bool shouldRunInference = true;
@ -53,7 +53,7 @@ namespace ofxOnnxRuntime {
            }
            // Method to safely get the result
-            float* getResult() {
+            std::vector<Ort::Value>* getResult() {
                std::lock_guard<std::mutex> lock(mutex);
                return result;
            }