Add support for channels-first image processing and update return types to std::vector<Ort::Value>

6 months ago · 61546e8cbe
3 changed files with 53 additions and 18 deletions
--- a/src/ofxOnnxRuntime.cpp
+++ b/src/ofxOnnxRuntime.cpp
@ -21,6 +21,7 @@ namespace ofxOnnxRuntime
 		this->output_dtype = base_setting.output_dtype;
 		this->inputWidth = base_setting.width;
 		this->inputHeight = base_setting.height;
+		this->channelsFirst = base_setting.channelsFirst;

 		Ort::SessionOptions session_options;
 		session_options.SetIntraOpNumThreads(1);
@ -57,6 +58,9 @@ namespace ofxOnnxRuntime
 	{
 		Ort::AllocatorWithDefaultOptions allocator;

+		input_node_names.clear();
+		output_node_names.clear();
+
 		// 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one
 		for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) {
 			input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get());
@ -90,7 +94,7 @@ namespace ofxOnnxRuntime
 		}
 	}

-	float* BaseHandler::run()
+	std::vector<Ort::Value>* BaseHandler::run()
 	{
 		
 		auto start = std::chrono::high_resolution_clock::now(); // starting timestamp
@ -101,7 +105,7 @@ namespace ofxOnnxRuntime

 		if(input_imgs.size() != batch_size) {
 			ofLog() << "Input images do not match batch size. Inference FAILED.";
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
+			return &dummy_output_tensor;
 		}

 		// 1. Create 1-D array for all values to create tensor & push all values from input_vals to batch_vals
@ -146,7 +150,7 @@ namespace ofxOnnxRuntime
 			// Before running the model, check if we have data
 		if (input_dtype == ModelDataType::INT32 && batch_values_int.empty()) {
 			ofLog() << "Error: INT32 batch values vector is empty";
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
+			return &dummy_output_tensor;
 		}

 		try {
@ -171,7 +175,7 @@ namespace ofxOnnxRuntime
 						std::cout << ", ";
 					}
 				}
-				std::cout << "]" << std::endl;
+				std::cout << "]" << "| Length: " << output_values.size() << std::endl;
 				
 				// Optional: Print total number of elements
 				size_t total_elements = 1;
@ -189,11 +193,11 @@ namespace ofxOnnxRuntime
 				std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
 			}

-			return output_values.front().GetTensorMutableData<float>();
+			return &output_values;

 		} catch (const Ort::Exception& ex) {
 			std::cout << "ERROR running model inference: " << ex.what() << std::endl;
-			return dummy_output_tensor.front().GetTensorMutableData<float>();
+			return &dummy_output_tensor;
 		}
 		
 	}
@ -222,17 +226,32 @@ namespace ofxOnnxRuntime
 			resizedImage = cvImage;
 		}

+		cv::Mat reorderedImage;
+		cv::cvtColor(resizedImage, reorderedImage, cv::COLOR_BGR2RGB);  // Convert BGR to RGB
+
 		// Convert to float32 & normalise (keeping the 0-255 range)
 		cv::Mat floatImage;
-		resizedImage.convertTo(floatImage, CV_32F, 1.0/255.0);
+		reorderedImage.convertTo(floatImage, CV_32F, 1.0/255.0);

 		// Calculate offset in destination array NEED TO CALC PRODUCT
 		int elementsPerImage = CalculateProduct(input_node_dims);
 		int startPos = idx * elementsPerImage;

-        // Copy directly
-		float* floatPtr = reinterpret_cast<float*>(floatImage.data);
-		std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos);
+		// If the model expects the channels (rgb) first, then we need to swap them around, if not, proceed as normal
+		if (!channelsFirst) {
+			// Convert to float, and make a copy
+			float* floatPtr = reinterpret_cast<float*>(floatImage.data);
+			std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos);
+		} else {
+			// If we need to rearrange, split into 3 mats -> the output format should be, all R first, then G, then B. Instead of RGB, RGB, for each pixel
+			std::vector<cv::Mat> splitChannels(3);
+			cv::split(floatImage, splitChannels); // split into R, G, B
+
+			size_t planeSize = inputWidth * inputHeight;
+			for (int c = 0; c < 3; ++c) {
+				memcpy(&values[startPos + c * planeSize], splitChannels[c].ptr<float>(), planeSize * sizeof(float));
+			}
+		}
    }

    void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) {
@ -251,18 +270,31 @@ namespace ofxOnnxRuntime
 		} else {
 			resizedImage = cvImage;
 		}
+		cv::Mat reorderedImage;
+		cv::cvtColor(resizedImage, reorderedImage, cv::COLOR_BGR2RGB);  // Convert BGR to RGB
 		
 		// Convert uint8 image to int32 (keeping the 0-255 range)
 		cv::Mat intImage;
-		resizedImage.convertTo(intImage, CV_32SC3);
+		reorderedImage.convertTo(intImage, CV_32SC3);

 		// Calculate offset in destination array CALC PRODUCT
 		int elementsPerImage = CalculateProduct(input_node_dims);
 		int startPos = idx * elementsPerImage;
 		
-		// Copy directly
-		int32_t* intPtr = reinterpret_cast<int32_t*>(intImage.data);
-		std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos);
+		if(!channelsFirst) {
+			// Copy directly
+			int32_t* intPtr = reinterpret_cast<int32_t*>(intImage.data);
+			std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos);
+		} else {
+			std::vector<cv::Mat> splitChannels(3);
+			cv::split(intImage, splitChannels); // split into R, G, B
+
+			size_t planeSize = inputWidth * inputHeight;
+			for (int c = 0; c < 3; ++c) {
+				memcpy(&values[startPos + c * planeSize], splitChannels[c].ptr<int32_t>(), planeSize * sizeof(int32_t));
+			}
+		}
+		
 	}

 	void BaseHandler::setInputs(std::vector<ofImage*>& in) {
--- a/src/ofxOnnxRuntime.h
+++ b/src/ofxOnnxRuntime.h
@ -26,6 +26,7 @@ namespace ofxOnnxRuntime
 		ModelDataType output_dtype = FLOAT32;
 		int width;
 		int height;
+		bool channelsFirst = false;
 	};

 	class BaseHandler
@ -33,13 +34,13 @@ namespace ofxOnnxRuntime
 		public:
 			BaseHandler() {}

-			void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32, 256, 256 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false);
+			void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32, 256, 256, false }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false);
 			void setup2(const std::string& onnx_path, const Ort::SessionOptions& session_options);
 			void setNames();
 			void setInputs(std::vector<ofImage*>& input_imgs);
 			void convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx);
 			void convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx);
-			float* run();
+			std::vector<Ort::Value>* run();

 			// Utilities ╰（‵□′）╯
 			std::string PrintShape(const std::vector<int64_t>& v);
@ -80,5 +81,7 @@ namespace ofxOnnxRuntime

 			int inputWidth;
 			int inputHeight;
+
+			bool channelsFirst;
 	};
 }
--- a/src/ofxOnnxRuntimeThread.h
+++ b/src/ofxOnnxRuntimeThread.h
@ -8,7 +8,7 @@ namespace ofxOnnxRuntime {
    {
        public:
            ofxOnnxRuntime::BaseHandler* onnx;
-            float* result = nullptr;
+            std::vector<Ort::Value>* result = nullptr;
            bool isInferenceComplete = false;
            bool shouldRunInference = true;

@ -53,7 +53,7 @@ namespace ofxOnnxRuntime {
            }

            // Method to safely get the result
-            float* getResult() {
+            std::vector<Ort::Value>* getResult() {
                std::lock_guard<std::mutex> lock(mutex);
                return result;
            }