diff --git a/.gitignore b/.gitignore index 00bfb09..d8e6bb4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # Ignoring onnxruntime libs -# /libs/onnxruntime/lib/* +/libs/onnxruntime/lib/msys2/* example-*/config.make example-*/*.sln diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1fc710b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,58 @@ +{ + "files.associations": { + "xiosbase": "cpp", + "algorithm": "cpp", + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "compare": "cpp", + "concepts": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "exception": "cpp", + "functional": "cpp", + "initializer_list": "cpp", + "ios": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "iterator": "cpp", + "limits": "cpp", + "list": "cpp", + "memory": "cpp", + "new": "cpp", + "numeric": "cpp", + "optional": "cpp", + "ostream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "string": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "typeinfo": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "utility": "cpp", + "variant": "cpp", + "vector": "cpp", + "xfacet": "cpp", + "xhash": "cpp", + "xlocale": "cpp", + "xlocinfo": "cpp", + "xlocnum": "cpp", + "xmemory": "cpp", + "xstddef": "cpp", + "xstring": "cpp", + "xtr1common": "cpp", + "xutility": "cpp" + } +} \ No newline at end of file diff --git a/addon_config.mk b/addon_config.mk index 704f4d8..4e09896 100644 --- a/addon_config.mk +++ b/addon_config.mk @@ -1,11 +1,12 @@ meta: ADDON_NAME = ofxOnnxRuntime ADDON_DESCRIPTION = "ONNX Runtime addon for OpenFrameworks" - ADDON_AUTHOR = Yuya Hanai + ADDON_AUTHOR = Cailean Finn ADDON_TAGS = "ONNX" - ADDON_URL = https://github.com/hanasaan/ofxOnnxRuntime + ADDON_URL = https://github.com/caileannn/ofxOnnxRuntime common: + ADDON_DEPENDENCIES = ofxOpenCv ADDON_INCLUDES = libs/onnxruntime/include ADDON_INCLUDES += src osx: diff --git a/src/ofxOnnxRuntime.cpp b/src/ofxOnnxRuntime.cpp index e26e3a2..3648728 100644 --- a/src/ofxOnnxRuntime.cpp +++ b/src/ofxOnnxRuntime.cpp @@ -14,9 +14,12 @@ namespace ofxOnnxRuntime return wstr; } #endif - void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp) { + // Store data types + this->input_dtype = base_setting.input_dtype; + this->output_dtype = base_setting.output_dtype; + Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); session_options.SetIntraOpNumThreads(1); @@ -70,13 +73,6 @@ namespace ofxOnnxRuntime if (debug) ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]); - // 3. Resize input values array to match input tensor/s - input_values_handler.resize(batch_size); - - for (auto& tensor : input_values_handler) { - tensor.resize(input_node_size); - } - // 2. Clear up output values output_node_dims.clear(); output_values.clear(); @@ -99,20 +95,42 @@ namespace ofxOnnxRuntime std::vector input_tensors; + size_t num_images = input_imgs.size(); + + if(input_imgs.size() != batch_size) { + ofLog() << "Input images do not match batch size. Inference FAILED."; + return dummy_output_tensor.front().GetTensorMutableData(); + } + // 1. Create 1-D array for all values to create tensor & push all values from input_vals to batch_vals std::vector batch_values; - batch_values.reserve(input_node_size * batch_size); // Reserve space but don't initialize + batch_values.resize(input_node_size * batch_size); // Reserve space but don't initialize - for (const auto& inner_vec : input_values_handler) { - for (float value : inner_vec) { - batch_values.push_back(value); + std::vector batch_values_int; + batch_values_int.resize(input_node_size * batch_size); // Reserve space but don't initialize + + if (input_dtype == ModelDataType::FLOAT32){ + // I have a list of imgs, these need to be converted from images into input for the model (int or float) + for(size_t i = 0; i < batch_size; i++) { + convertImageToMatFloat(input_imgs[i], batch_values, i); } - } - // 2. Create tensor with batch values { input data, input size, model input dims, model input size} - input_tensors.emplace_back(Ort::Value::CreateTensor( - memory_info_handler, batch_values.data(), input_node_size, + // 2. Create tensor with batch values { input data, input size, model input dims, model input size} + input_tensors.emplace_back(Ort::Value::CreateTensor( + memory_info_handler, batch_values.data(), input_node_size, + input_node_dims.data(), input_node_dims.size())); + } + else if (input_dtype == ModelDataType::INT32) { + // I have a list of imgs, these need to be converted from images into input for the model (int or float) + for(size_t i = 0; i < batch_size; i++) { + convertImageToMatInt32(input_imgs[i], batch_values_int, i); + } + + // 2. Create tensor with batch values { input data, input size, model input dims, model input size} + input_tensors.emplace_back(Ort::Value::CreateTensor( + memory_info_handler, batch_values_int.data(), input_node_size, input_node_dims.data(), input_node_dims.size())); + } // transform std::string -> const char* std::vector input_names_char(input_node_names.size(), nullptr); @@ -123,6 +141,11 @@ namespace ofxOnnxRuntime std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), [&](const std::string& str) { return str.c_str(); }); + // Before running the model, check if we have data + if (input_dtype == ModelDataType::INT32 && batch_values_int.empty()) { + ofLog() << "Error: INT32 batch values vector is empty"; + return dummy_output_tensor.front().GetTensorMutableData(); + } try { // 3. Run inference, { in names, input data, num of inputs, output names, num of outputs } @@ -179,9 +202,55 @@ namespace ofxOnnxRuntime * */ - // Fills the tensor (selected by the idx) with input values from ofFloatPixels array - void BaseHandler::setInput(ofFloatPixels &pixels, int tensor_idx, int width, int height, int channels) { - pixels.setFromExternalPixels(getInputTensorData()->at(tensor_idx).data(), width, height, channels); + // Add separate methods for float and int32 conversion + void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector& values, size_t& idx) { + // Your existing conversion code for float + ofPixels& pix = img->getPixels(); + int width = img->getWidth(); + int height = img->getHeight(); + int channels = pix.getNumChannels(); + + cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); + cv::InputArray inputArray(cvImage); + image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false); + + std::memcpy( + values.data() + idx * channels * width * height, + image_array.data, + channels * width * height * sizeof(float) + ); + } + + void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector& values, size_t& idx) { + ofPixels& pix = img->getPixels(); + int width = img->getWidth(); + int height = img->getHeight(); + int channels = pix.getNumChannels(); + + cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); + + // Create blob with the correct dimensions + cv::Mat floatMat = cv::dnn::blobFromImage(cvImage, 1/255.0, + cv::Size(256, 256), + cv::Scalar(0, 0, 0), false, false); + + // Convert float blob to int32 + cv::Mat intMat; + floatMat.convertTo(intMat, CV_32S); + + // Calculate how many values we need to add + size_t elementsPerImage = channels * 256* 256; + size_t startPos = idx * elementsPerImage; + + // Copy data from intMat to values + int32_t* intData = (int32_t*)intMat.data; + for (size_t i = 0; i < elementsPerImage; i++) { + values[startPos + i] = intData[i]; + } + } + + void BaseHandler::setInputs(std::vector& in) { + this->input_imgs = in; } // Prints the shape of the given tensor (ex. input: (1, 1, 512, 512)) diff --git a/src/ofxOnnxRuntime.h b/src/ofxOnnxRuntime.h index a513a7c..c62fc51 100644 --- a/src/ofxOnnxRuntime.h +++ b/src/ofxOnnxRuntime.h @@ -2,6 +2,7 @@ #include #include "ofMain.h" +#include "ofxOpenCv.h" namespace ofxOnnxRuntime { @@ -12,10 +13,17 @@ namespace ofxOnnxRuntime INFER_TENSORRT }; + enum ModelDataType { + FLOAT32, + INT32 + }; + struct BaseSetting { InferType infer_type; int device_id; + ModelDataType input_dtype = FLOAT32; + ModelDataType output_dtype = FLOAT32; }; class BaseHandler @@ -23,10 +31,12 @@ namespace ofxOnnxRuntime public: BaseHandler() {} - void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false); + void setup(const std::string& onnx_path, const BaseSetting& base_setting = BaseSetting{ INFER_CPU, 0, FLOAT32, FLOAT32 }, const int& batch_size = 1, const bool debug = false, const bool timestamp = false); void setup2(const std::string& onnx_path, const Ort::SessionOptions& session_options); void setNames(); - void setInput(ofFloatPixels &pixels, int tensor_idx, int width, int height, int channels); + void setInputs(std::vector& input_imgs); + void convertImageToMatInt32(ofImage* img, std::vector& values, size_t& idx); + void convertImageToMatFloat(ofImage* img, std::vector& values, size_t& idx); float* run(); // Utilities ╰(‵□′)╯ @@ -34,10 +44,6 @@ namespace ofxOnnxRuntime Ort::Value GenerateTensor(int batch_size); int CalculateProduct(const std::vector& v); Ort::Value VectorToTensor(std::vector& data, const std::vector& shape); - - std::vector>* getInputTensorData() { - return &this->input_values_handler; - } protected: bool debug = false; @@ -63,5 +69,11 @@ namespace ofxOnnxRuntime std::vector batched_dims; int batch_size; int num_outputs = 1; + + std::vector input_imgs; + cv::Mat image_array; + + ModelDataType input_dtype; + ModelDataType output_dtype; }; } diff --git a/temp.cpp b/temp.cpp new file mode 100644 index 0000000..4744494 --- /dev/null +++ b/temp.cpp @@ -0,0 +1,297 @@ +#include "ofxOnnxRuntime.h" + +namespace ofxOnnxRuntime +{ +#ifdef _MSC_VER + static std::wstring to_wstring(const std::string &str) + { + unsigned len = str.size() * 2; + setlocale(LC_CTYPE, ""); + wchar_t *p = new wchar_t[len]; + mbstowcs(p, str.c_str(), len); + std::wstring wstr(p); + delete[] p; + return wstr; + } +#endif + void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp) + { + // Store data types + this->input_dtype = base_setting.input_dtype; + this->output_dtype = base_setting.output_dtype; + + Ort::SessionOptions session_options; + session_options.SetIntraOpNumThreads(1); + session_options.SetIntraOpNumThreads(1); + session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); + + if (base_setting.infer_type == INFER_CUDA) { + OrtCUDAProviderOptions opts; + opts.device_id = 0; + opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; + opts.do_copy_in_default_stream = 0; + opts.arena_extend_strategy = 0; + session_options.AppendExecutionProvider_CUDA(opts); + } + + this->timestamp = timestamp; + this->debug = debug; + this->batch_size = batch_size; + this->setup2(onnx_path, session_options); + } + + void BaseHandler::setup2(const std::string & onnx_path, const Ort::SessionOptions & session_options) + { + std::string path = ofToDataPath(onnx_path, true); + + std::wstring wpath(path.begin(), path.end()); // basic conversion + + ort_session = std::make_shared(ort_env, wpath.c_str(), session_options); + + setNames(); + } + + void BaseHandler::setNames() + { + Ort::AllocatorWithDefaultOptions allocator; + + // 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one + for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) { + input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get()); + input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); + + // Some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. (?, 3, 28, 28) -> (1, 3, 28, 28) + for (auto& s : input_node_dims) if (s < 0) s = batch_size; + + if (debug) std::cout << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl; + } + + // 2. Calculate the product of the dimensions + for (auto& f : input_node_dims) { + input_node_size *= f; + } + + if (debug) ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]); + + // 2. Clear up output values + output_node_dims.clear(); + output_values.clear(); + + // 3. Gets Output name/s & Shapes + for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) { + output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get()); + auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); + + output_values.emplace_back(nullptr); + + if (debug) std::cout << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl; + } + } + + float* BaseHandler::run() + { + + auto start = std::chrono::high_resolution_clock::now(); // starting timestamp + + std::vector input_tensors; + + size_t num_images = input_imgs.size(); + + if(input_imgs.size() != batch_size) { + ofLog() << "Input images do not match batch size. Inference FAILED."; + return dummy_output_tensor.front().GetTensorMutableData(); + } + + // transform std::string -> const char* + std::vector input_names_char(input_node_names.size(), nullptr); + std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), + [&](const std::string& str) { return str.c_str(); }); + + std::vector output_names_char(output_node_names.size(), nullptr); + std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), + [&](const std::string& str) { return str.c_str(); }); + + std::vector batch_values_f; + std::vector batch_values_int32; + batch_values_f.reserve(input_node_size * batch_size); // Reserve space but don't initialize + batch_values_int32.reserve(input_node_size * batch_size); // Reserve space but don't initialize + + + if (input_dtype == ModelDataType::FLOAT32){ + // I have a list of imgs, these need to be converted from images into input for the model (int or float) + for(size_t i = 0; i < batch_size; i++) { + convertImageToMatFloat(input_imgs[i], batch_values_f, i); + } + + // 2. Create tensor with batch values { input data, input size, model input dims, model input size} + input_tensors.emplace_back(Ort::Value::CreateTensor( + memory_info_handler, batch_values_f.data(), input_node_size, + input_node_dims.data(), input_node_dims.size())); + } + else if (input_dtype == ModelDataType::INT32) { + // I have a list of imgs, these need to be converted from images into input for the model (int or float) + for(size_t i = 0; i < batch_size; i++) { + convertImageToMatInt32(input_imgs[i], batch_values_int32, i); + } + + // 2. Create tensor with batch values { input data, input size, model input dims, model input size} + input_tensors.emplace_back(Ort::Value::CreateTensor( + memory_info_handler, batch_values_int32.data(), input_node_size, + input_node_dims.data(), input_node_dims.size())); + } + + + + + try { + // 3. Run inference, { in names, input data, num of inputs, output names, num of outputs } + ofLog() << "run"; + output_values = ort_session->Run(Ort::RunOptions{ nullptr }, + input_names_char.data(), input_tensors.data(), + input_names_char.size(), output_names_char.data(), + output_names_char.size()); + ofLog() << "ran"; + + if (debug) { + // Gets the address of the first value + auto& out = output_values.front(); + // Get tensor shape information + Ort::TensorTypeAndShapeInfo info = out.GetTensorTypeAndShapeInfo(); + std::vector output_dims = info.GetShape(); + + // Print the dimensions + std::cout << "Output tensor dimensions: ["; + for (size_t i = 0; i < output_dims.size(); i++) { + std::cout << output_dims[i]; + if (i < output_dims.size() - 1) { + std::cout << ", "; + } + } + std::cout << "]" << std::endl; + + // Optional: Print total number of elements + size_t total_elements = 1; + for (auto& dim : output_dims) { + if (dim > 0) { // Handle dynamic dimensions + total_elements *= static_cast(dim); + } + } + std::cout << "Total elements: " << total_elements << std::endl; + } + + // if (timestamp) { + // auto end = std::chrono::high_resolution_clock::now(); + // std::chrono::duration elapsed = end - start; + // std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; + // } + + return output_values.front().GetTensorMutableData(); + + } catch (const Ort::Exception& ex) { + std::cout << "ERROR running model inference: " << ex.what() << std::endl; + return dummy_output_tensor.front().GetTensorMutableData(); + } + + } + + /* + * + * Utilties (。・∀・)ノ゙ + * + */ + + // Add separate methods for float and int32 conversion + void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector& values, size_t& idx) { + // Your existing conversion code for float + ofPixels& pix = img->getPixels(); + int width = img->getWidth(); + int height = img->getHeight(); + int channels = pix.getNumChannels(); + + cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); + cv::InputArray inputArray(cvImage); + image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false); + + std::memcpy( + values.data() + idx * channels * width * height, + image_array.data, + channels * width * height * sizeof(float) + ); + } + + void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector& values, size_t& idx) { + // New conversion code for int32 + ofPixels& pix = img->getPixels(); + int width = img->getWidth(); + int height = img->getHeight(); + int channels = pix.getNumChannels(); + + cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); + cv::InputArray inputArray(cvImage); + cv::Mat intMat = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(height, width), (0, 0, 0), false, false); + + intMat.convertTo(image_array, CV_32S); + + std::memcpy( + values.data() + idx * channels * width * height, + image_array.data, + channels * width * height * sizeof(int32_t) + ); + } + + void BaseHandler::setInputs(std::vector& in) { + this->input_imgs = in; + } + + // Prints the shape of the given tensor (ex. input: (1, 1, 512, 512)) + std::string BaseHandler::PrintShape(const std::vector& v) { + std::stringstream ss; + for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x"; + ss << v[v.size() - 1]; + return ss.str(); + } + + Ort::Value BaseHandler::GenerateTensor(int batch_size) { + // Random number generation setup + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0f, 255.0f); // Random values between 0 and 255 + + // Calculate the total number of elements for a single tensor (without batch dimension) {?, 8} -> 8 + int tensor_size = CalculateProduct(input_node_dims); + + // Create a vector to hold all the values for the batch (8 * (4)batch_size) -> 32 + std::vector batch_values(batch_size * tensor_size); + + // Fill the batch with random values + std::generate(batch_values.begin(), batch_values.end(), [&]() { + return dis(gen); + }); + + // Fill the batch with random values + std::generate(batch_values.begin(), batch_values.end(), [&]() { + return dis(gen); + }); + + // Create the batched dimensions by inserting the batch size at the beginning of the original dimensions + std::vector batched_dims = { }; // Start with batch size + batched_dims.insert(batched_dims.end(), input_node_dims.begin(), input_node_dims.end()); // Add the remaining dimensions + batched_dims[0] = batch_size; + + return VectorToTensor(batch_values, batched_dims); + } + + int BaseHandler::CalculateProduct(const std::vector& v) { + int total = 1; + for (auto& i : v) total *= i; + return total; + } + + Ort::Value BaseHandler::VectorToTensor(std::vector& data, const std::vector& shape) { + // Create a tensor from the provided data, shape, and memory info + auto tensor = Ort::Value::CreateTensor(memory_info_handler, data.data(), data.size(), shape.data(), shape.size()); + + // Return the created tensor + return tensor; + } +}