3 changed files with 113 additions and 322 deletions
@ -0,0 +1,74 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include "ofMain.h" |
||||
|
#include "ofxOnnxRuntime.h" |
||||
|
|
||||
|
namespace ofxOnnxRuntime { |
||||
|
class OnnxThread : public ofThread |
||||
|
{ |
||||
|
public: |
||||
|
ofxOnnxRuntime::BaseHandler* onnx; |
||||
|
float* result = nullptr; |
||||
|
bool isInferenceComplete = false; |
||||
|
bool shouldRunInference = true; |
||||
|
|
||||
|
~OnnxThread() { |
||||
|
stop(); |
||||
|
waitForThread(false); |
||||
|
} |
||||
|
|
||||
|
void setup(ofxOnnxRuntime::BaseHandler* onnx) { |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
this->onnx = onnx; |
||||
|
} |
||||
|
|
||||
|
void start() { |
||||
|
startThread(); |
||||
|
} |
||||
|
|
||||
|
void stop() { |
||||
|
stopThread(); |
||||
|
condition.notify_all(); |
||||
|
} |
||||
|
|
||||
|
void threadedFunction() { |
||||
|
while (isThreadRunning()) { |
||||
|
std::unique_lock<std::mutex> lock(mutex); |
||||
|
runOnnx(); |
||||
|
condition.wait(lock); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void update() { |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
condition.notify_one(); |
||||
|
} |
||||
|
|
||||
|
void runOnnx() { |
||||
|
if (shouldRunInference) { |
||||
|
result = onnx->run(); |
||||
|
isInferenceComplete = true; |
||||
|
shouldRunInference = false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Method to safely get the result
|
||||
|
float* getResult() { |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
bool checkInferenceComplete() { |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
return isInferenceComplete; |
||||
|
} |
||||
|
|
||||
|
void resetInferenceFlag() { |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
isInferenceComplete = false; |
||||
|
} |
||||
|
|
||||
|
protected: |
||||
|
std::condition_variable condition; |
||||
|
}; |
||||
|
} |
@ -1,297 +0,0 @@ |
|||||
#include "ofxOnnxRuntime.h" |
|
||||
|
|
||||
namespace ofxOnnxRuntime |
|
||||
{ |
|
||||
#ifdef _MSC_VER |
|
||||
static std::wstring to_wstring(const std::string &str) |
|
||||
{ |
|
||||
unsigned len = str.size() * 2; |
|
||||
setlocale(LC_CTYPE, ""); |
|
||||
wchar_t *p = new wchar_t[len]; |
|
||||
mbstowcs(p, str.c_str(), len); |
|
||||
std::wstring wstr(p); |
|
||||
delete[] p; |
|
||||
return wstr; |
|
||||
} |
|
||||
#endif |
|
||||
void BaseHandler::setup(const std::string & onnx_path, const BaseSetting & base_setting, const int & batch_size, const bool debug, const bool timestamp) |
|
||||
{ |
|
||||
// Store data types
|
|
||||
this->input_dtype = base_setting.input_dtype; |
|
||||
this->output_dtype = base_setting.output_dtype; |
|
||||
|
|
||||
Ort::SessionOptions session_options; |
|
||||
session_options.SetIntraOpNumThreads(1); |
|
||||
session_options.SetIntraOpNumThreads(1); |
|
||||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); |
|
||||
|
|
||||
if (base_setting.infer_type == INFER_CUDA) { |
|
||||
OrtCUDAProviderOptions opts; |
|
||||
opts.device_id = 0; |
|
||||
opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; |
|
||||
opts.do_copy_in_default_stream = 0; |
|
||||
opts.arena_extend_strategy = 0; |
|
||||
session_options.AppendExecutionProvider_CUDA(opts); |
|
||||
} |
|
||||
|
|
||||
this->timestamp = timestamp; |
|
||||
this->debug = debug; |
|
||||
this->batch_size = batch_size; |
|
||||
this->setup2(onnx_path, session_options); |
|
||||
} |
|
||||
|
|
||||
void BaseHandler::setup2(const std::string & onnx_path, const Ort::SessionOptions & session_options) |
|
||||
{ |
|
||||
std::string path = ofToDataPath(onnx_path, true); |
|
||||
|
|
||||
std::wstring wpath(path.begin(), path.end()); // basic conversion
|
|
||||
|
|
||||
ort_session = std::make_shared<Ort::Session>(ort_env, wpath.c_str(), session_options); |
|
||||
|
|
||||
setNames(); |
|
||||
} |
|
||||
|
|
||||
void BaseHandler::setNames() |
|
||||
{ |
|
||||
Ort::AllocatorWithDefaultOptions allocator; |
|
||||
|
|
||||
// 1. Gets Input Name/s & Shape ([1, 3, 28, 28]) -- In most cases this is usually just one
|
|
||||
for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) { |
|
||||
input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get()); |
|
||||
input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); |
|
||||
|
|
||||
// Some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. (?, 3, 28, 28) -> (1, 3, 28, 28)
|
|
||||
for (auto& s : input_node_dims) if (s < 0) s = batch_size; |
|
||||
|
|
||||
if (debug) std::cout << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl; |
|
||||
} |
|
||||
|
|
||||
// 2. Calculate the product of the dimensions
|
|
||||
for (auto& f : input_node_dims) { |
|
||||
input_node_size *= f; |
|
||||
} |
|
||||
|
|
||||
if (debug) ofLog() << ofToString(input_node_size) + ", Batch Size:" + ofToString(input_node_dims[0]); |
|
||||
|
|
||||
// 2. Clear up output values
|
|
||||
output_node_dims.clear(); |
|
||||
output_values.clear(); |
|
||||
|
|
||||
// 3. Gets Output name/s & Shapes
|
|
||||
for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) { |
|
||||
output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get()); |
|
||||
auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); |
|
||||
|
|
||||
output_values.emplace_back(nullptr); |
|
||||
|
|
||||
if (debug) std::cout << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
float* BaseHandler::run() |
|
||||
{ |
|
||||
|
|
||||
auto start = std::chrono::high_resolution_clock::now(); // starting timestamp
|
|
||||
|
|
||||
std::vector<Ort::Value> input_tensors; |
|
||||
|
|
||||
size_t num_images = input_imgs.size(); |
|
||||
|
|
||||
if(input_imgs.size() != batch_size) { |
|
||||
ofLog() << "Input images do not match batch size. Inference FAILED."; |
|
||||
return dummy_output_tensor.front().GetTensorMutableData<float>(); |
|
||||
} |
|
||||
|
|
||||
// transform std::string -> const char*
|
|
||||
std::vector<const char*> input_names_char(input_node_names.size(), nullptr); |
|
||||
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), |
|
||||
[&](const std::string& str) { return str.c_str(); }); |
|
||||
|
|
||||
std::vector<const char*> output_names_char(output_node_names.size(), nullptr); |
|
||||
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), |
|
||||
[&](const std::string& str) { return str.c_str(); }); |
|
||||
|
|
||||
std::vector<float> batch_values_f; |
|
||||
std::vector<int32_t> batch_values_int32; |
|
||||
batch_values_f.reserve(input_node_size * batch_size); // Reserve space but don't initialize
|
|
||||
batch_values_int32.reserve(input_node_size * batch_size); // Reserve space but don't initialize
|
|
||||
|
|
||||
|
|
||||
if (input_dtype == ModelDataType::FLOAT32){ |
|
||||
// I have a list of imgs, these need to be converted from images into input for the model (int or float)
|
|
||||
for(size_t i = 0; i < batch_size; i++) { |
|
||||
convertImageToMatFloat(input_imgs[i], batch_values_f, i); |
|
||||
} |
|
||||
|
|
||||
// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
|
|
||||
input_tensors.emplace_back(Ort::Value::CreateTensor<float>( |
|
||||
memory_info_handler, batch_values_f.data(), input_node_size, |
|
||||
input_node_dims.data(), input_node_dims.size())); |
|
||||
} |
|
||||
else if (input_dtype == ModelDataType::INT32) { |
|
||||
// I have a list of imgs, these need to be converted from images into input for the model (int or float)
|
|
||||
for(size_t i = 0; i < batch_size; i++) { |
|
||||
convertImageToMatInt32(input_imgs[i], batch_values_int32, i); |
|
||||
} |
|
||||
|
|
||||
// 2. Create tensor with batch values { input data, input size, model input dims, model input size}
|
|
||||
input_tensors.emplace_back(Ort::Value::CreateTensor<int32_t>( |
|
||||
memory_info_handler, batch_values_int32.data(), input_node_size, |
|
||||
input_node_dims.data(), input_node_dims.size())); |
|
||||
} |
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
try { |
|
||||
// 3. Run inference, { in names, input data, num of inputs, output names, num of outputs }
|
|
||||
ofLog() << "run"; |
|
||||
output_values = ort_session->Run(Ort::RunOptions{ nullptr }, |
|
||||
input_names_char.data(), input_tensors.data(), |
|
||||
input_names_char.size(), output_names_char.data(), |
|
||||
output_names_char.size()); |
|
||||
ofLog() << "ran"; |
|
||||
|
|
||||
if (debug) { |
|
||||
// Gets the address of the first value
|
|
||||
auto& out = output_values.front(); |
|
||||
// Get tensor shape information
|
|
||||
Ort::TensorTypeAndShapeInfo info = out.GetTensorTypeAndShapeInfo(); |
|
||||
std::vector<int64_t> output_dims = info.GetShape(); |
|
||||
|
|
||||
// Print the dimensions
|
|
||||
std::cout << "Output tensor dimensions: ["; |
|
||||
for (size_t i = 0; i < output_dims.size(); i++) { |
|
||||
std::cout << output_dims[i]; |
|
||||
if (i < output_dims.size() - 1) { |
|
||||
std::cout << ", "; |
|
||||
} |
|
||||
} |
|
||||
std::cout << "]" << std::endl; |
|
||||
|
|
||||
// Optional: Print total number of elements
|
|
||||
size_t total_elements = 1; |
|
||||
for (auto& dim : output_dims) { |
|
||||
if (dim > 0) { // Handle dynamic dimensions
|
|
||||
total_elements *= static_cast<size_t>(dim); |
|
||||
} |
|
||||
} |
|
||||
std::cout << "Total elements: " << total_elements << std::endl; |
|
||||
} |
|
||||
|
|
||||
// if (timestamp) {
|
|
||||
// auto end = std::chrono::high_resolution_clock::now();
|
|
||||
// std::chrono::duration<double, std::milli> elapsed = end - start;
|
|
||||
// std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
|
|
||||
// }
|
|
||||
|
|
||||
return output_values.front().GetTensorMutableData<float>(); |
|
||||
|
|
||||
} catch (const Ort::Exception& ex) { |
|
||||
std::cout << "ERROR running model inference: " << ex.what() << std::endl; |
|
||||
return dummy_output_tensor.front().GetTensorMutableData<float>(); |
|
||||
} |
|
||||
|
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* |
|
||||
* Utilties (。・∀・)ノ゙ |
|
||||
* |
|
||||
*/ |
|
||||
|
|
||||
// Add separate methods for float and int32 conversion
|
|
||||
void BaseHandler::convertImageToMatFloat(ofImage* img, std::vector<float>& values, size_t& idx) { |
|
||||
// Your existing conversion code for float
|
|
||||
ofPixels& pix = img->getPixels(); |
|
||||
int width = img->getWidth(); |
|
||||
int height = img->getHeight(); |
|
||||
int channels = pix.getNumChannels(); |
|
||||
|
|
||||
cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); |
|
||||
cv::InputArray inputArray(cvImage); |
|
||||
image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2], input_node_dims[3]), (0, 0, 0), false, false); |
|
||||
|
|
||||
std::memcpy( |
|
||||
values.data() + idx * channels * width * height, |
|
||||
image_array.data, |
|
||||
channels * width * height * sizeof(float) |
|
||||
); |
|
||||
} |
|
||||
|
|
||||
void BaseHandler::convertImageToMatInt32(ofImage* img, std::vector<int32_t>& values, size_t& idx) { |
|
||||
// New conversion code for int32
|
|
||||
ofPixels& pix = img->getPixels(); |
|
||||
int width = img->getWidth(); |
|
||||
int height = img->getHeight(); |
|
||||
int channels = pix.getNumChannels(); |
|
||||
|
|
||||
cv::Mat cvImage = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pix.getData()); |
|
||||
cv::InputArray inputArray(cvImage); |
|
||||
cv::Mat intMat = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(height, width), (0, 0, 0), false, false); |
|
||||
|
|
||||
intMat.convertTo(image_array, CV_32S); |
|
||||
|
|
||||
std::memcpy( |
|
||||
values.data() + idx * channels * width * height, |
|
||||
image_array.data, |
|
||||
channels * width * height * sizeof(int32_t) |
|
||||
); |
|
||||
} |
|
||||
|
|
||||
void BaseHandler::setInputs(std::vector<ofImage*>& in) { |
|
||||
this->input_imgs = in; |
|
||||
} |
|
||||
|
|
||||
// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
|
|
||||
std::string BaseHandler::PrintShape(const std::vector<int64_t>& v) { |
|
||||
std::stringstream ss; |
|
||||
for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x"; |
|
||||
ss << v[v.size() - 1]; |
|
||||
return ss.str(); |
|
||||
} |
|
||||
|
|
||||
Ort::Value BaseHandler::GenerateTensor(int batch_size) { |
|
||||
// Random number generation setup
|
|
||||
std::random_device rd; |
|
||||
std::mt19937 gen(rd()); |
|
||||
std::uniform_real_distribution<float> dis(0.0f, 255.0f); // Random values between 0 and 255
|
|
||||
|
|
||||
// Calculate the total number of elements for a single tensor (without batch dimension) {?, 8} -> 8
|
|
||||
int tensor_size = CalculateProduct(input_node_dims); |
|
||||
|
|
||||
// Create a vector to hold all the values for the batch (8 * (4)batch_size) -> 32
|
|
||||
std::vector<float> batch_values(batch_size * tensor_size); |
|
||||
|
|
||||
// Fill the batch with random values
|
|
||||
std::generate(batch_values.begin(), batch_values.end(), [&]() { |
|
||||
return dis(gen); |
|
||||
}); |
|
||||
|
|
||||
// Fill the batch with random values
|
|
||||
std::generate(batch_values.begin(), batch_values.end(), [&]() { |
|
||||
return dis(gen); |
|
||||
}); |
|
||||
|
|
||||
// Create the batched dimensions by inserting the batch size at the beginning of the original dimensions
|
|
||||
std::vector<int64_t> batched_dims = { }; // Start with batch size
|
|
||||
batched_dims.insert(batched_dims.end(), input_node_dims.begin(), input_node_dims.end()); // Add the remaining dimensions
|
|
||||
batched_dims[0] = batch_size; |
|
||||
|
|
||||
return VectorToTensor(batch_values, batched_dims); |
|
||||
} |
|
||||
|
|
||||
int BaseHandler::CalculateProduct(const std::vector<int64_t>& v) { |
|
||||
int total = 1; |
|
||||
for (auto& i : v) total *= i; |
|
||||
return total; |
|
||||
} |
|
||||
|
|
||||
Ort::Value BaseHandler::VectorToTensor(std::vector<float>& data, const std::vector<int64_t>& shape) { |
|
||||
// Create a tensor from the provided data, shape, and memory info
|
|
||||
auto tensor = Ort::Value::CreateTensor<float>(memory_info_handler, data.data(), data.size(), shape.data(), shape.size()); |
|
||||
|
|
||||
// Return the created tensor
|
|
||||
return tensor; |
|
||||
} |
|
||||
} |
|
Loading…
Reference in new issue