From 61546e8cbebe84f88b6c8346eef3dfbda0e03e0b Mon Sep 17 00:00:00 2001 From: cailean Date: Sun, 1 Jun 2025 12:32:55 +0100 Subject: [PATCH] Add support for channels-first image processing and update return types to std::vector --- src/ofxOnnxRuntime.cpp | 60 +++++++++++++++++++++++++++++--------- src/ofxOnnxRuntime.h | 7 +++-- src/ofxOnnxRuntimeThread.h | 4 +-- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/src/ofxOnnxRuntime.cpp b/src/ofxOnnxRuntime.cpp index a4451a2..148da37 100644 --- a/src/ofxOnnxRuntime.cpp +++ b/src/ofxOnnxRuntime.cpp @@ -21,6 +21,7 @@ namespace ofxOnnxRuntime this->output_dtype = base_setting.output_dtype; this->inputWidth = base_setting.width; this->inputHeight = base_setting.height; + this->channelsFirst = base_setting.channelsFirst; Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); @@ -57,6 +58,9 @@ namespace ofxOnnxRuntime { Ort::AllocatorWithDefaultOptions allocator; + input_node_names.clear(); + output_node_names.clear(); + // 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) { input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get()); @@ -90,7 +94,7 @@ namespace ofxOnnxRuntime } } - float* BaseHandler::run() + std::vector* BaseHandler::run() { auto start = std::chrono::high_resolution_clock::now(); // starting timestamp @@ -101,7 +105,7 @@ namespace ofxOnnxRuntime if(input_imgs.size() != batch_size) { ofLog() << "Input images do not match batch size. Inference FAILED."; - return dummy_output_tensor.front().GetTensorMutableData(); + return &dummy_output_tensor; } // 1. Create 1-D array for all values to create tensor & push all values from input_vals to batch_vals @@ -146,7 +150,7 @@ namespace ofxOnnxRuntime // Before running the model, check if we have data if (input_dtype == ModelDataType::INT32 && batch_values_int.empty()) { ofLog() << "Error: INT32 batch values vector is empty"; - return dummy_output_tensor.front().GetTensorMutableData(); + return &dummy_output_tensor; } try { @@ -171,7 +175,7 @@ namespace ofxOnnxRuntime std::cout << ", "; } } - std::cout << "]" << std::endl; + std::cout << "]" << "| Length: " << output_values.size() << std::endl; // Optional: Print total number of elements size_t total_elements = 1; @@ -189,11 +193,11 @@ namespace ofxOnnxRuntime std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; } - return output_values.front().GetTensorMutableData(); + return &output_values; } catch (const Ort::Exception& ex) { std::cout << "ERROR running model inference: " << ex.what() << std::endl; - return dummy_output_tensor.front().GetTensorMutableData(); + return &dummy_output_tensor; } } @@ -222,17 +226,32 @@ namespace ofxOnnxRuntime resizedImage = cvImage; } + cv::Mat reorderedImage; + cv::cvtColor(resizedImage, reorderedImage, cv::COLOR_BGR2RGB); // Convert BGR to RGB + // Convert to float32 & normalise (keeping the 0-255 range) cv::Mat floatImage; - resizedImage.convertTo(floatImage, CV_32F, 1.0/255.0); + reorderedImage.convertTo(floatImage, CV_32F, 1.0/255.0); // Calculate offset in destination array NEED TO CALC PRODUCT int elementsPerImage = CalculateProduct(input_node_dims); int startPos = idx * elementsPerImage; - // Copy directly - float* floatPtr = reinterpret_cast(floatImage.data); - std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos); + // If the model expects the channels (rgb) first, then we need to swap them around, if not, proceed as normal + if (!channelsFirst) { + // Convert to float, and make a copy + float* floatPtr = reinterpret_cast(floatImage.data); + std::copy(floatPtr, floatPtr + elementsPerImage, values.begin() + startPos); + } else { + // If we need to rearrange, split into 3 mats -> the output format should be, all R first, then G, then B. Instead of RGB, RGB, for each pixel + std::vector splitChannels(3); + cv::split(floatImage, splitChannels); // split into R, G, B + + size_t planeSize = inputWidth * inputHeight; + for (int c = 0; c < 3; ++c) { + memcpy(&values[startPos + c * planeSize], splitChannels[c].ptr(), planeSize * sizeof(float)); + } + } } void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector& values, size_t& idx) { @@ -251,18 +270,31 @@ namespace ofxOnnxRuntime } else { resizedImage = cvImage; } + cv::Mat reorderedImage; + cv::cvtColor(resizedImage, reorderedImage, cv::COLOR_BGR2RGB); // Convert BGR to RGB // Convert uint8 image to int32 (keeping the 0-255 range) cv::Mat intImage; - resizedImage.convertTo(intImage, CV_32SC3); + reorderedImage.convertTo(intImage, CV_32SC3); // Calculate offset in destination array CALC PRODUCT int elementsPerImage = CalculateProduct(input_node_dims); int startPos = idx * elementsPerImage; - // Copy directly - int32_t* intPtr = reinterpret_cast(intImage.data); - std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos); + if(!channelsFirst) { + // Copy directly + int32_t* intPtr = reinterpret_cast(intImage.data); + std::copy(intPtr, intPtr + elementsPerImage, values.begin() + startPos); + } else { + std::vector splitChannels(3); + cv::split(intImage, splitChannels); // split into R, G, B + + size_t planeSize = inputWidth * inputHeight; + for (int c = 0; c < 3; ++c) { + memcpy(&values[startPos + c * planeSize], splitChannels[c].ptr(), planeSize * sizeof(int32_t)); + } + } + } void BaseHandler::setInputs(std::vector& in) { diff --git a/src/ofxOnnxRuntime.h b/src/ofxOnnxRuntime.h index 774a425..882546a 100644 --- a/src/ofxOnnxRuntime.h +++ b/src/ofxOnnxRuntime.h @@ -26,6 +26,7 @@ namespace ofxOnnxRuntime ModelDataType output_dtype = FLOAT32; int width; int height; + bool channelsFirst = false; }; class BaseHandler @@ -33,13 +34,13 @@ namespace ofxOnnxRuntime public: BaseHandler() {} - void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32, 256, 256 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false); + void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32, 256, 256, false }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false); void setup2(const std::string& onnx_path, const Ort::SessionOptions& session_options); void setNames(); void setInputs(std::vector& input_imgs); void convertImageToMatInt32(ofImage* img, std::vector& values, size_t& idx); void convertImageToMatFloat(ofImage* img, std::vector& values, size_t& idx); - float* run(); + std::vector* run(); // Utilities ╰(‵□′)╯ std::string PrintShape(const std::vector& v); @@ -80,5 +81,7 @@ namespace ofxOnnxRuntime int inputWidth; int inputHeight; + + bool channelsFirst; }; } diff --git a/src/ofxOnnxRuntimeThread.h b/src/ofxOnnxRuntimeThread.h index 1caa862..9f0b136 100644 --- a/src/ofxOnnxRuntimeThread.h +++ b/src/ofxOnnxRuntimeThread.h @@ -8,7 +8,7 @@ namespace ofxOnnxRuntime { { public: ofxOnnxRuntime::BaseHandler* onnx; - float* result = nullptr; + std::vector* result = nullptr; bool isInferenceComplete = false; bool shouldRunInference = true; @@ -53,7 +53,7 @@ namespace ofxOnnxRuntime { } // Method to safely get the result - float* getResult() { + std::vector* getResult() { std::lock_guard lock(mutex); return result; }