cailean
2 months ago
30 changed files with 1197 additions and 38 deletions
After Width: | Height: | Size: 3.2 KiB |
@ -0,0 +1,41 @@ |
|||
OF_GLSL_SHADER_HEADER |
|||
precision mediump float; |
|||
|
|||
uniform sampler2DRect tex0; // Main texture |
|||
uniform sampler2D tex1; // Bayer texture |
|||
uniform vec2 resolution; |
|||
uniform float time; |
|||
uniform int frame; |
|||
|
|||
in vec2 varyingtexcoord; |
|||
out vec4 fragColor; |
|||
|
|||
const float SIZE = 256.0; |
|||
|
|||
const mat4 ditherMatrix = mat4( |
|||
1.0/17.0, 9.0/17.0, 3.0/17.0, 11.0/17.0, |
|||
13.0/17.0, 5.0/17.0, 15.0/17.0, 7.0/17.0, |
|||
4.0/17.0, 12.0/17.0, 2.0/17.0, 10.0/17.0, |
|||
16.0/17.0, 8.0/17.0, 14.0/17.0, 6.0/17.0 |
|||
); |
|||
|
|||
void main() { |
|||
vec2 uv = varyingtexcoord / resolution; |
|||
vec3 t = texture(tex0, varyingtexcoord).rgb; |
|||
|
|||
vec2 fragCoord = gl_FragCoord.xy; |
|||
|
|||
// color to display, use floating point precision here |
|||
vec3 col = t; |
|||
|
|||
// Sample the Bayer 8x8 texture for the dither pattern |
|||
float ditherValue = texture(tex1, fragCoord / 8.0).r; |
|||
|
|||
// Apply dithering with increased effect |
|||
col += (ditherValue - 0.5) / 2.0 * 2.0; |
|||
|
|||
// Quantize the color |
|||
col = (floor(col * 8.0) + 0.5) / 8.0; |
|||
|
|||
fragColor = vec4(col, 1.0); |
|||
} |
@ -0,0 +1,16 @@ |
|||
OF_GLSL_SHADER_HEADER |
|||
|
|||
uniform mat4 modelViewMatrix; |
|||
uniform mat4 projectionMatrix; |
|||
uniform mat4 textureMatrix; |
|||
uniform mat4 modelViewProjectionMatrix; |
|||
|
|||
in vec4 position; |
|||
in vec2 texcoord; |
|||
|
|||
out vec2 varyingtexcoord; |
|||
void main() |
|||
{ |
|||
varyingtexcoord = texcoord; |
|||
gl_Position = modelViewProjectionMatrix * position; |
|||
} |
@ -0,0 +1,25 @@ |
|||
#version 150 |
|||
|
|||
uniform sampler2D tex0; |
|||
|
|||
in vec2 tex_c; |
|||
|
|||
out vec4 outputColor; |
|||
|
|||
float hash13(vec3 p3) { |
|||
p3 = fract(p3 * .1031); |
|||
p3 += dot(p3, p3.yzx + 19.19); |
|||
return fract((p3.x + p3.y) * p3.z); |
|||
} |
|||
|
|||
void main() |
|||
{ |
|||
vec4 texColor = texture(tex0, tex_c); |
|||
|
|||
// Discard fragments with alpha below a threshold |
|||
if (texColor.a < 0.1) { |
|||
discard; |
|||
} |
|||
|
|||
outputColor = texColor; |
|||
} |
@ -0,0 +1,36 @@ |
|||
#version 150 |
|||
|
|||
uniform mat4 modelViewProjectionMatrix; |
|||
uniform float gridSize; |
|||
|
|||
in vec2 texcoord; |
|||
in vec4 position; |
|||
|
|||
out vec2 tex_c; |
|||
|
|||
void main() |
|||
{ |
|||
tex_c = texcoord; |
|||
|
|||
vec2 resolution = vec2(1280.0, 960.0); |
|||
|
|||
// Transform the vertex position to clip space |
|||
vec4 clipPos = modelViewProjectionMatrix * position; |
|||
|
|||
// Perform perspective division |
|||
vec3 ndcPos = clipPos.xyz / clipPos.w; |
|||
|
|||
// Convert to screen space |
|||
vec2 screenPos = (ndcPos.xy + 1.0) * 0.5 * resolution; |
|||
|
|||
// Snap to grid |
|||
screenPos = floor(screenPos / gridSize) * gridSize; |
|||
|
|||
// Convert back to NDC space |
|||
ndcPos.xy = (screenPos / resolution) * 2.0 - 1.0; |
|||
|
|||
// Reconstruct the clip space position |
|||
clipPos = vec4(ndcPos * clipPos.w, clipPos.w); |
|||
|
|||
gl_Position = clipPos; |
|||
} |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,40 @@ |
|||
#include "Request.h" |
|||
|
|||
/* setup http server */ |
|||
void Request::setup(std::string ip, int port, std::string page){ |
|||
std::cout << "Initialising HTTP Server" << std::endl; |
|||
|
|||
req.method = ofHttpRequest::POST; |
|||
req.url = "http://" + ip + ":" + ofToString(port) + "/" + page; |
|||
req.headers["Content-Type"] = "application/json"; |
|||
} |
|||
|
|||
/* send a request to vp_server & return frame/video/folder */ |
|||
VPResp Request::query(Embedding& in){ |
|||
VPResp vp_resp; |
|||
try { |
|||
req.body = "{\"vector\": [" + |
|||
ofToString(in.emotions["angry"]) + "," + |
|||
ofToString(in.emotions["disgust"]) + "," + |
|||
ofToString(in.emotions["fear"]) + "," + |
|||
ofToString(in.emotions["happy"]) + "," + |
|||
ofToString(in.emotions["sad"]) + "," + |
|||
ofToString(in.emotions["surprise"]) + "," + |
|||
ofToString(in.emotions["neutral"]) + "]}"; |
|||
|
|||
auto resp = http.handleRequest(req); |
|||
json_resp = ofJson::parse(resp.data.getText()); |
|||
vp_resp.folder = json_resp["folder"]; |
|||
vp_resp.image = json_resp["image"]; |
|||
vp_resp.video = json_resp["video"]; |
|||
vp_resp.frame = json_resp["frame"]; |
|||
vp_resp.lost = json_resp["lost"]; |
|||
|
|||
past_vp_resp = vp_resp; |
|||
|
|||
return vp_resp; |
|||
} catch (exception e) { |
|||
// Some issue happening here when plugging in controllers, or when they initially connect
|
|||
return past_vp_resp; |
|||
} |
|||
} |
@ -0,0 +1,44 @@ |
|||
#pragma once |
|||
|
|||
#include "ofMain.h" |
|||
#include <string> |
|||
#include <unordered_map> |
|||
|
|||
struct Embedding { |
|||
std::unordered_map<std::string, float> emotions; |
|||
|
|||
Embedding() { |
|||
// Initialize with default values
|
|||
emotions["angry"] = 0.0f; |
|||
emotions["disgust"] = 0.0f; |
|||
emotions["fear"] = 0.0f; |
|||
emotions["happy"] = 0.0f; |
|||
emotions["sad"] = 0.0f; |
|||
emotions["surprise"] = 0.0f; |
|||
emotions["neutral"] = 0.0f; |
|||
} |
|||
|
|||
bool operator!=(const Embedding &other) const { |
|||
return emotions != other.emotions; |
|||
} |
|||
}; |
|||
|
|||
/* Vantage point query structure */ |
|||
struct VPResp{ |
|||
std::string folder; |
|||
std::string video; |
|||
std::string image; |
|||
std::string frame; |
|||
int lost; |
|||
}; |
|||
|
|||
class Request{ |
|||
public: |
|||
void setup(std::string ip, int port, std::string page); |
|||
VPResp query(Embedding& in); |
|||
|
|||
ofHttpRequest req; |
|||
ofURLFileLoader http; |
|||
ofJson json_resp; |
|||
VPResp past_vp_resp; |
|||
}; |
@ -0,0 +1,152 @@ |
|||
#include "Server.h" |
|||
|
|||
void Server::start(){ |
|||
std::cout << "Initialising TCP sever" << std::endl; |
|||
server.setup(port); |
|||
osc_sender.setup(OSC_HOST, OSC_PORT); |
|||
http.setup(http_ip, http_port, http_page); |
|||
is_active = true; |
|||
previous_embedding = embedding; |
|||
last_change_time = std::chrono::steady_clock::now(); |
|||
} |
|||
|
|||
void Server::update(){ |
|||
|
|||
for ( int i = 0; i < server.getLastID(); i++){ |
|||
if (server.isClientConnected(i)) { |
|||
const int buffer_size = 8; |
|||
char buffer[buffer_size]; |
|||
int bytes_recieved = server.receiveRawBytes(i, buffer, buffer_size); |
|||
|
|||
if (bytes_recieved == buffer_size){ |
|||
float value; |
|||
int id; |
|||
memcpy(&value, buffer, sizeof(float)); |
|||
memcpy(&id, buffer + sizeof(float), sizeof(int)); |
|||
|
|||
std::string ip_address = server.getClientIP(i); |
|||
|
|||
addOrUpdateClient(id, value, ip_address); |
|||
} |
|||
} |
|||
} |
|||
|
|||
updateEmbedding(); |
|||
checkActivity(); |
|||
sendOSCMessage(); |
|||
|
|||
if(debug){ |
|||
printClients(); |
|||
} |
|||
} |
|||
|
|||
void Server::addOrUpdateClient(int client_id, float value, const std::string& ip_address){ |
|||
ClientInfo client; |
|||
|
|||
client.ip_address = ip_address; |
|||
client.value = value; |
|||
|
|||
clients[client_id] = client; |
|||
} |
|||
|
|||
void Server::updateEmbedding() { |
|||
std::vector<std::string> emotionNames = { |
|||
"angry", "disgust", "fear", "happy", "sad", "surprise", "neutral" |
|||
}; |
|||
|
|||
for (const auto& c : clients) { |
|||
const ClientInfo& info = c.second; |
|||
float val = std::round(info.value * 100.0f) / 100.0f; |
|||
|
|||
if (c.first >= 0 && c.first < emotionNames.size()) { |
|||
embedding.emotions[emotionNames[c.first]] = val; |
|||
} |
|||
|
|||
// Special case for neutral (index 6)
|
|||
if (c.first == 6) { |
|||
embedding.emotions["fear"] = ofRandom(0.1, 0.6); |
|||
embedding.emotions["angry"] = ofRandom(0.01, 0.99); |
|||
embedding.emotions["happy"] = ofRandom(0.01, 0.99); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void Server::printClients(){ |
|||
for( const auto& c : clients){ |
|||
int id = c.first; |
|||
const ClientInfo& info = c.second; |
|||
std::cout << "id: " << id |
|||
<< ", value: " << info.value |
|||
<< ", IP: " << info.ip_address << std::endl; |
|||
} |
|||
|
|||
std::cout << is_active << std::endl; |
|||
} |
|||
|
|||
/* check if the controllers are in use */ |
|||
void Server::checkActivity(){ |
|||
if (previous_embedding.emotions["neutral"] != embedding.emotions["neutral"]) { // Check if embedding has changed
|
|||
last_change_time = std::chrono::steady_clock::now(); // Reset the timer if there is a change
|
|||
previous_embedding = embedding; // Update the previous embedding to the current one
|
|||
is_active = true; |
|||
sendHttpRequest(); |
|||
} else { |
|||
// Calculate the time since the last change
|
|||
auto now = std::chrono::steady_clock::now(); |
|||
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - last_change_time).count(); |
|||
|
|||
if (duration >= 2) { |
|||
is_active = false; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* send osc msg, check if audio file exists and it is different to the past audiofile */ |
|||
void Server::sendOSCMessage(){ |
|||
std::vector<ofxOscMessage> messages; |
|||
|
|||
ofxOscMessage me_0; |
|||
ofxOscMessage me_1; |
|||
ofxOscMessage me_2; |
|||
ofxOscMessage me_3; |
|||
ofxOscMessage me_file; |
|||
|
|||
std::string audio_file = vp_resp.folder; |
|||
|
|||
// Check if file exists in a given dir
|
|||
ofFile file("/home/cailean/Desktop/rave/all_wav_files/" + audio_file + ".wav"); |
|||
|
|||
if(!is_active && (audio_file != past_audio_file) && file.exists()){ |
|||
me_file.setAddress("/emote/filename"); |
|||
me_file.addStringArg(audio_file + ".wav"); |
|||
messages.push_back(me_file); |
|||
past_audio_file = audio_file; |
|||
} |
|||
|
|||
me_0.setAddress("/emote/0"); |
|||
me_0.addFloatArg(embedding.emotions["neutral"]); |
|||
messages.push_back(me_0); |
|||
me_1.setAddress("/emote/1"); |
|||
me_1.addFloatArg(embedding.emotions["neutral"]); |
|||
messages.push_back(me_1); |
|||
me_2.setAddress("/emote/2"); |
|||
me_2.addFloatArg(embedding.emotions["neutral"]); |
|||
messages.push_back(me_2); |
|||
me_3.setAddress("/emote/3"); |
|||
me_3.addFloatArg(embedding.emotions["neutral"]); |
|||
messages.push_back(me_3); |
|||
|
|||
for (auto& msg : messages){ |
|||
osc_sender.sendMessage(msg, false); |
|||
} |
|||
|
|||
} |
|||
|
|||
/* sends request to http server if is_active = true */ |
|||
void Server::sendHttpRequest(){ |
|||
vp_resp = http.query(embedding); |
|||
} |
|||
|
|||
std::vector<std::vector<double>> Server::generateRandomVectors(int count, int dimension){ |
|||
return tree.generateRandomVectors(count, dimension); |
|||
} |
@ -0,0 +1,58 @@ |
|||
#pragma once |
|||
|
|||
#include "ofMain.h" |
|||
#include "ofxNetwork.h" |
|||
#include "Request.h" |
|||
#include <unordered_map> |
|||
#include <chrono> |
|||
#include "ofxOsc.h" |
|||
#include "../vp/VP.h" |
|||
|
|||
#define OSC_HOST "127.0.0.1" |
|||
#define OSC_PORT 9002 |
|||
|
|||
struct ClientInfo { |
|||
float value; |
|||
std::string ip_address; |
|||
}; |
|||
|
|||
class Server{ |
|||
public: |
|||
Server(int _port, Embedding _embedding, VP _tree, bool debug, std::string _http_ip, int _http_port, std::string _http_page) |
|||
: port(_port), embedding(_embedding), tree(_tree), debug(debug), http_ip(_http_ip), http_port(_http_port), http_page(_http_page) {} |
|||
|
|||
void start(); |
|||
void update(); |
|||
void addOrUpdateClient(int client_id, float value, const std::string& ip_address); |
|||
void printClients(); |
|||
void updateEmbedding(); |
|||
void checkActivity(); |
|||
void sendHttpRequest(); |
|||
void sendOSCMessage(); |
|||
std::vector<std::vector<double>> generateRandomVectors(int count, int dimension); |
|||
|
|||
|
|||
|
|||
int port; |
|||
ofxTCPServer server; |
|||
std::unordered_map<int, ClientInfo> clients; |
|||
bool debug; |
|||
bool is_active; |
|||
Embedding embedding; |
|||
|
|||
Request http; |
|||
std::string http_ip; |
|||
int http_port; |
|||
std::string http_page; |
|||
VPResp vp_resp; |
|||
|
|||
private: |
|||
Embedding previous_embedding; |
|||
std::chrono::time_point<std::chrono::steady_clock> last_change_time; |
|||
|
|||
ofxOscSender osc_sender; |
|||
std::string past_audio_file; |
|||
|
|||
/* vp tree */ |
|||
VP tree; |
|||
}; |
@ -0,0 +1,99 @@ |
|||
#include "ofMain.h" |
|||
#include "Onnx.h" |
|||
#include "Yolo.h" |
|||
|
|||
class ModelThread : public ofThread |
|||
{ |
|||
public: |
|||
ofImage* img; |
|||
ofFbo* fbo; |
|||
Onnx* model; |
|||
Yolo* yolo; |
|||
std::vector<types::BoxfWithLandmarks>* detected_faces; |
|||
std::string model_type; |
|||
|
|||
// emotional recognition model
|
|||
std::vector<ofImage>* croppedFaces; |
|||
float* emotional_data; |
|||
|
|||
|
|||
~ModelThread(){ |
|||
stop(); |
|||
waitForThread(false); |
|||
} |
|||
|
|||
void setup(ofImage* _img, ofFbo* _fbo, Onnx* _model){ |
|||
std::lock_guard<std::mutex> lock(mutex); |
|||
this->img = _img; |
|||
this->fbo = _fbo; |
|||
this->model = _model; |
|||
this->model_type = "depth"; |
|||
} |
|||
|
|||
void setupYolo(ofImage* _img, std::vector<types::BoxfWithLandmarks>* _detected_faces, Onnx* _model, Yolo* _yolo){ |
|||
std::lock_guard<std::mutex> lock(mutex); |
|||
this->img = _img; |
|||
this->detected_faces = _detected_faces; |
|||
this->model_type = "yolo"; |
|||
this->model = _model; |
|||
this->yolo = _yolo; |
|||
} |
|||
|
|||
void start(){ |
|||
startThread(); |
|||
} |
|||
|
|||
void stop(){ |
|||
stopThread(); |
|||
condition.notify_all(); |
|||
} |
|||
|
|||
void threadedFunction(){ |
|||
while(isThreadRunning()){ |
|||
if(model_type == "depth"){ |
|||
std::unique_lock<std::mutex> lock(mutex); |
|||
inferDepthImage(fbo, img, model); |
|||
condition.wait(lock); |
|||
} else if(model_type == "yolo") { |
|||
std::unique_lock<std::mutex> lock(mutex); |
|||
inferYolo(); |
|||
condition.wait(lock); |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
void update(){ |
|||
std::lock_guard<std::mutex> lock(mutex); |
|||
condition.notify_one(); |
|||
} |
|||
|
|||
void inferYolo(){ |
|||
auto output_tensors_face = model->Run(*img); |
|||
|
|||
auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape(); |
|||
|
|||
unsigned int num_anchors = output_faces[1]; // Number of anchors
|
|||
|
|||
float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>(); |
|||
|
|||
yolo->ParseOutput(output_face_ptr, *detected_faces, num_anchors); |
|||
} |
|||
|
|||
void inferDepthImage(ofFbo* fbo, ofImage* img, Onnx* model){ |
|||
auto output_tensors = model->Run(*img); |
|||
float* output_ptr = output_tensors.front().GetTensorMutableData<float>(); |
|||
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount(); |
|||
|
|||
float min_value = model->ReduceMin(output_ptr, num_elements); |
|||
float max_value = model->ReduceMax(output_ptr, num_elements); |
|||
|
|||
model->Normalize(output_ptr, num_elements, min_value, max_value); |
|||
|
|||
model->DataToFbo(output_ptr, 518, 518, *fbo); |
|||
} |
|||
|
|||
|
|||
protected: |
|||
std::condition_variable condition; |
|||
}; |
@ -0,0 +1,272 @@ |
|||
#include "Onnx.h" |
|||
#include <cmath> |
|||
|
|||
// Setups the model. CUDA is enabled by default
|
|||
void Onnx::Setup(ORTCHAR_T* modelPath, bool isLog, bool useCuda){ |
|||
session_options.SetIntraOpNumThreads(1); |
|||
session_options.SetIntraOpNumThreads(1); |
|||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); |
|||
log = isLog; |
|||
|
|||
// cuda setup
|
|||
if(useCuda){ |
|||
OrtCUDAProviderOptions opts; |
|||
opts.device_id = 0; |
|||
opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; |
|||
opts.do_copy_in_default_stream = 0; |
|||
opts.arena_extend_strategy = 0; |
|||
session_options.AppendExecutionProvider_CUDA(opts); |
|||
} |
|||
|
|||
ort_session = std::make_shared<Ort::Session>(ort_env, modelPath, session_options); |
|||
|
|||
Ort::AllocatorWithDefaultOptions allocator; |
|||
for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) { |
|||
input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get()); |
|||
input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); |
|||
if(log) |
|||
std::cout << "\t" << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl; |
|||
} |
|||
|
|||
// some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size.
|
|||
for (auto& s : input_node_dims) { |
|||
if (s < 0) { |
|||
s = 1; |
|||
if(log) |
|||
std::cout << "transfromed!" << std::endl; |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
if(log) |
|||
std::cout << "Output Node Name/Shape (" << output_node_names.size() << "):" << std::endl; |
|||
for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) { |
|||
output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get()); |
|||
auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); |
|||
if(log) |
|||
std::cout << "\t" << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl; |
|||
} |
|||
|
|||
} |
|||
|
|||
// Runs the model, given an image
|
|||
std::vector<Ort::Value> Onnx::Run(ofImage &img){ |
|||
auto start = std::chrono::high_resolution_clock::now(); |
|||
|
|||
TransformImage(img); |
|||
size_t input_tensor_size = image_array.total(); |
|||
std::vector<Ort::Value> input_tensors; |
|||
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); |
|||
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(mem_info, (float*)image_array.data, |
|||
input_tensor_size, input_node_dims.data(), input_node_dims.size())); |
|||
|
|||
// double-check the dimensions of the input tensor
|
|||
assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == input_node_dims); |
|||
|
|||
//std::cout << "\ninput_tensor shape: " << PrintShape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) << std::endl;
|
|||
|
|||
// pass data through model
|
|||
std::vector<const char*> input_names_char(input_node_names.size(), nullptr); |
|||
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), |
|||
[&](const std::string& str) { return str.c_str(); }); |
|||
|
|||
std::vector<const char*> output_names_char(output_node_names.size(), nullptr); |
|||
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), |
|||
[&](const std::string& str) { return str.c_str(); }); |
|||
|
|||
try { |
|||
auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_names_char.data(), input_tensors.data(), |
|||
input_names_char.size(), output_names_char.data(), output_names_char.size()); |
|||
|
|||
if (timeStamp) { |
|||
auto end = std::chrono::high_resolution_clock::now(); |
|||
std::chrono::duration<double, std::milli> elapsed = end - start; |
|||
std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; |
|||
} |
|||
|
|||
return output_tensors; |
|||
|
|||
} catch (const Ort::Exception& exception) { |
|||
std::cout << "ERROR running model inference: " << exception.what() << std::endl; |
|||
return input_tensors; |
|||
|
|||
} |
|||
|
|||
} |
|||
|
|||
/*
|
|||
Runs a model, with a batch of images as input (emotion.onnx) |
|||
(1) Creates a 1-dim float array based on the batch * channels * width * height) |
|||
(2) Transforms each image into a cv::Mat, and appends it to the array |
|||
(3) Sends that information to the model to be processed. |
|||
*/ |
|||
std::vector<Ort::Value> Onnx::RunBatch(std::vector<ofImage>& images){ |
|||
|
|||
auto start = std::chrono::high_resolution_clock::now(); |
|||
|
|||
// Number of images in the batch
|
|||
size_t batchSize = images.size(); |
|||
|
|||
std::vector<int64_t> batch_node_dims = {static_cast<int64_t>(batchSize), 3, 260, 260}; |
|||
|
|||
std::vector<float> batch_image_array(static_cast<int64_t>(batchSize) * 3 * 260 * 260); |
|||
|
|||
for(size_t i = 0; i < batchSize; i++){ |
|||
TransformImage(images[i]); |
|||
// Copy the image data into the batch_image_array
|
|||
std::memcpy( |
|||
batch_image_array.data() + i * 3 * 260 * 260, // Destination: starting point for this image in the batch array
|
|||
image_array.data, // Source: pixel data in the cv::Mat
|
|||
3 * 260 * 260 * sizeof(float) // Size: number of bytes to copy (3 channels * 260 * 260 pixels)
|
|||
); |
|||
} |
|||
|
|||
size_t input_tensor_size = batch_image_array.size(); |
|||
std::vector<Ort::Value> input_tensors; |
|||
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); |
|||
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(mem_info, batch_image_array.data(), |
|||
input_tensor_size, batch_node_dims.data(), batch_node_dims.size())); |
|||
|
|||
// double-check the dimensions of the input tensor
|
|||
assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == batch_node_dims); |
|||
|
|||
std::cout << "\ninput_tensor shape: " << PrintShape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) << std::endl; |
|||
|
|||
// pass data through model
|
|||
std::vector<const char*> input_names_char(input_node_names.size(), nullptr); |
|||
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), |
|||
[&](const std::string& str) { return str.c_str(); }); |
|||
|
|||
std::vector<const char*> output_names_char(output_node_names.size(), nullptr); |
|||
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), |
|||
[&](const std::string& str) { return str.c_str(); }); |
|||
|
|||
try { |
|||
auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_names_char.data(), input_tensors.data(), |
|||
input_names_char.size(), output_names_char.data(), output_names_char.size()); |
|||
|
|||
if (timeStamp) { |
|||
auto end = std::chrono::high_resolution_clock::now(); |
|||
std::chrono::duration<double, std::milli> elapsed = end - start; |
|||
std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; |
|||
} |
|||
|
|||
return output_tensors; |
|||
|
|||
} catch (const Ort::Exception& exception) { |
|||
std::cout << "ERROR running model inference: " << exception.what() << std::endl; |
|||
return input_tensors; |
|||
|
|||
} |
|||
|
|||
} |
|||
|
|||
// Transforms an ofImage into a cv::ImageArray
|
|||
void Onnx::TransformImage(ofImage &img){ |
|||
// Convert ofImage to cv::Mat
|
|||
ofPixels &pixels = img.getPixels(); // Get pixels from ofImage
|
|||
int width = img.getWidth(); |
|||
int height = img.getHeight(); |
|||
int channels = img.getPixels().getNumChannels(); |
|||
|
|||
// Create a cv::Mat from the pixel data
|
|||
cv::Mat cvImg = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pixels.getData()); |
|||
|
|||
// Convert cv::Mat to cv::InputArray
|
|||
cv::InputArray inputArray(cvImg); |
|||
|
|||
image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2] , input_node_dims[3] ), (0, 0, 0), false, false); |
|||
} |
|||
|
|||
/* Generates a random 1 dimensional float array, creating values between 0->255.
|
|||
Returns a tensor. */ |
|||
Ort::Value Onnx::GenerateTensor(){ |
|||
std::vector<float> random_input_tensor_values(CalculateProduct(input_node_dims)); |
|||
std::generate(random_input_tensor_values.begin(), random_input_tensor_values.end(), [&] { return rand() % 255; }); |
|||
return VectorToTensor(random_input_tensor_values, input_node_dims); |
|||
} |
|||
|
|||
// Calculates the product of the vector, how many values.
|
|||
int Onnx::CalculateProduct(const std::vector<std::int64_t>& v){ |
|||
int total = 1; |
|||
for (auto& i : v) total *= i; |
|||
return total; |
|||
} |
|||
|
|||
// Creates a tensor from a given vector input.
|
|||
Ort::Value Onnx::VectorToTensor(std::vector<float>& data, const std::vector<std::int64_t>& shape){ |
|||
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); |
|||
auto tensor = Ort::Value::CreateTensor<float>(mem_info, data.data(), data.size(), shape.data(), shape.size()); |
|||
return tensor; |
|||
} |
|||
|
|||
// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
|
|||
std::string Onnx::PrintShape(const std::vector<std::int64_t>& v){ |
|||
std::stringstream ss(""); |
|||
for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x"; |
|||
ss << v[v.size() - 1]; |
|||
return ss.str(); |
|||
} |
|||
|
|||
// Function to calculate the minimum value in an array
|
|||
float Onnx::ReduceMin(const float* data, size_t size) { |
|||
return *std::min_element(data, data + size); |
|||
} |
|||
|
|||
// Function to calculate the maximum value in an array
|
|||
float Onnx::ReduceMax(const float* data, size_t size) { |
|||
return *std::max_element(data, data + size); |
|||
} |
|||
|
|||
// Function to normalize an array between 0 and 1
|
|||
void Onnx::Normalize(float* data, size_t size, float min_value, float max_value) { |
|||
for (size_t i = 0; i < size; ++i) { |
|||
data[i] = (data[i] - min_value) / (max_value - min_value); |
|||
} |
|||
} |
|||
|
|||
// Coverts the output tensor data to a texture of a given ofFbo.
|
|||
void Onnx::DataToFbo(float* data, size_t width, size_t height, ofFbo& fbo){ |
|||
// Convert data into opencv mat
|
|||
cv::Mat inputMat(height, width, CV_32FC1); |
|||
memcpy(inputMat.data, data, width * height * sizeof(float)); |
|||
// // Convert to 8-bit grayscale Mat
|
|||
cv::Mat inputMat8U; |
|||
inputMat.convertTo(inputMat8U, CV_8UC1, 255.0); // Convert float to 8-bit grayscale
|
|||
|
|||
// // Resize the image using OpenCV
|
|||
cv::Mat resizedMat; |
|||
cv::resize(inputMat8U, resizedMat, cv::Size(fbo.getWidth(), fbo.getHeight()), 0, 0, cv::INTER_LINEAR); |
|||
|
|||
// // Convert OpenCV Mat to ofPixels
|
|||
pixels.allocate(fbo.getWidth(), fbo.getHeight(), OF_PIXELS_GRAY); |
|||
|
|||
// // Copy data from resizedMat to ofPixels
|
|||
memcpy(pixels.getData(), resizedMat.data, fbo.getWidth() * fbo.getHeight()); |
|||
} |
|||
|
|||
void Onnx::SetPixels(ofFbo& fbo){ |
|||
fbo.begin(); |
|||
ofTexture& texture = fbo.getTexture(); |
|||
texture.loadData(pixels); |
|||
fbo.end(); |
|||
} |
|||
|
|||
void Onnx::Softmax(float* data, size_t size) { |
|||
std::vector<float> logits(data, data + size); |
|||
std::vector<float> expValues(size); |
|||
float maxLogit = *std::max_element(logits.begin(), logits.end()); |
|||
|
|||
// Calculate exp(logit - maxLogit) for numerical stability
|
|||
std::transform(logits.begin(), logits.end(), expValues.begin(), |
|||
[maxLogit](float logit) { return std::exp(logit - maxLogit); }); |
|||
|
|||
float sumExp = std::accumulate(expValues.begin(), expValues.end(), 0.0f); |
|||
|
|||
// Normalize to get probabilities
|
|||
std::transform(expValues.begin(), expValues.end(), data, |
|||
[sumExp](float expValue) { return expValue / sumExp; }); |
|||
} |
|||
|
|||
|
@ -0,0 +1,56 @@ |
|||
#pragma once |
|||
|
|||
#include <onnxruntime_cxx_api.h> |
|||
#include <algorithm> // std::generate |
|||
#include <cassert> |
|||
#include <cstddef> |
|||
#include <cstdint> |
|||
#include <iostream> |
|||
#include <sstream> |
|||
#include <string> |
|||
#include <vector> |
|||
#include <cstdlib> // For std::rand and std::srand |
|||
#include "ofMain.h" |
|||
#include "ofxOpenCv.h" |
|||
#include <numeric> |
|||
#include <cmath> |
|||
|
|||
class Onnx { |
|||
|
|||
public: |
|||
Onnx() {} |
|||
void Setup(ORTCHAR_T* modelPath, bool isLog, bool useCuda); |
|||
std::vector<Ort::Value> Run(ofImage &img); |
|||
std::vector<Ort::Value> RunBatch(std::vector<ofImage> &images); |
|||
std::string PrintShape(const std::vector<std::int64_t>& v); |
|||
Ort::Value VectorToTensor(std::vector<float>& data, const std::vector<std::int64_t>& shape); |
|||
Ort::Value GenerateTensor(); |
|||
int CalculateProduct(const std::vector<std::int64_t>& v); |
|||
void TransformImage(ofImage &img); |
|||
float ReduceMin(const float* data, size_t size); |
|||
float ReduceMax(const float* data, size_t size); |
|||
void Normalize(float* data, size_t size, float min_value, float max_value); |
|||
void DataToFbo(float* data, size_t width, size_t height, ofFbo& fbo); |
|||
void Softmax(float* data, size_t size); |
|||
void SetPixels(ofFbo& fbo); |
|||
bool timeStamp = false; |
|||
bool log = false; |
|||
ofPixels pixels; |
|||
|
|||
protected: |
|||
Ort::Env ort_env; |
|||
Ort::SessionOptions session_options; |
|||
cv::Mat image_array; |
|||
std::shared_ptr<Ort::Session> ort_session; |
|||
std::vector<std::string> input_node_names; |
|||
std::vector<int64_t> input_node_dims; // 1 input only.
|
|||
std::size_t input_tensor_size = 1; |
|||
std::vector<float> input_values_handler; |
|||
Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); |
|||
std::vector<std::string> output_node_names; |
|||
std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
|
|||
std::vector<Ort::Value> output_values; |
|||
Ort::Value dummy_tensor{ nullptr }; |
|||
std::vector<ofImage> imageBatch; |
|||
int num_outputs = 1; |
|||
}; |
@ -0,0 +1,157 @@ |
|||
#include "Yolo.h" |
|||
|
|||
// Takes output tensor data, processes that data, and returns an array of BoxfWithLandmarks (detected_faces)
|
|||
void Yolo::ParseOutput(float* &output_tensors, std::vector<types::BoxfWithLandmarks> &sorted_faces, unsigned int num_anchors) { |
|||
|
|||
std::vector<types::BoxfWithLandmarks> detected_faces; |
|||
|
|||
for (unsigned int i = 0; i < num_anchors; ++i) { |
|||
const float *row_ptr = output_tensors + i * 16; // Each row contains 16 values
|
|||
float obj_conf = row_ptr[4]; // Objectness confidence
|
|||
|
|||
if (obj_conf < 0.5) continue; // Filter out low-confidence detections
|
|||
|
|||
// Extract bounding box, confidence, and landmarks
|
|||
float cx = row_ptr[0]; |
|||
float cy = row_ptr[1]; |
|||
float w = row_ptr[2]; |
|||
float h = row_ptr[3]; |
|||
float cls_conf = row_ptr[15]; // Face confidence score
|
|||
|
|||
if (cls_conf < 0.5) continue; // Filter by class confidence
|
|||
|
|||
types::BoxfWithLandmarks face; |
|||
face.box.x1 = cx - w / 2; |
|||
face.box.y1 = cy - h / 2; |
|||
face.box.x2 = cx + w / 2; |
|||
face.box.y2 = cy + h / 2; |
|||
face.box.score = cls_conf; |
|||
face.box.label_text = "face"; |
|||
|
|||
// Extract landmarks
|
|||
for (int j = 0; j < 10; j += 2) { |
|||
face.landmarks.points.push_back(cv::Point2f(row_ptr[5 + j], row_ptr[5 + j + 1])); |
|||
} |
|||
|
|||
detected_faces.push_back(face); // Store the detected face with landmarks
|
|||
} |
|||
|
|||
// Apply NMS to detected faces list to remove any overlapping bounding boxes.
|
|||
NonMaximumSuppression(detected_faces, sorted_faces, 0.5); |
|||
|
|||
// Sort faces based on confidence value
|
|||
SortDetectedFaces(sorted_faces); |
|||
} |
|||
|
|||
// Simple helper for drawing boxes given x1, y1, x2, y2 coordinates.
|
|||
void Yolo::DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces){ |
|||
for (const auto &face : detected_faces) { |
|||
ofNoFill(); |
|||
float w = ofGetWindowWidth() / 2; |
|||
ofDrawRectangle(face.box.x1 + w, face.box.y1, ((face.box.x2 + w) - (face.box.x1 + w)), face.box.y2 - face.box.y1); |
|||
} |
|||
} |
|||
|
|||
// Simple helper to draw boxes at the center of the detected face.
|
|||
void Yolo::DrawCenter(std::vector<types::BoxfWithLandmarks> &detected_faces){ |
|||
ofNoFill(); |
|||
glm::vec2 position = detected_faces[0].box.center; |
|||
ofDrawCircle(position.x + ofGetWindowWidth() / 2, position.y, 5); |
|||
|
|||
} |
|||
|
|||
// Applies NMS to an array of BoxfWithLandmarks.face.boxes, to remove any overlapping bounding boxes.
|
|||
void Yolo::NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_faces, std::vector<types::BoxfWithLandmarks> &output_faces, float iou_threshold) |
|||
{ |
|||
// Sort the boxes by their confidence scores (highest to lowest)
|
|||
std::sort(input_faces.begin(), input_faces.end(), |
|||
[](const types::BoxfWithLandmarks &a, const types::BoxfWithLandmarks &b) { |
|||
return a.box.score > b.box.score; |
|||
}); |
|||
|
|||
std::vector<int> suppressed(input_faces.size(), 0); // Suppression mask
|
|||
|
|||
// Iterate through the boxes
|
|||
for (size_t i = 0; i < input_faces.size(); ++i) { |
|||
if (suppressed[i]) continue; // Skip already suppressed boxes
|
|||
|
|||
// Add this box to the output
|
|||
output_faces.push_back(input_faces[i]); |
|||
|
|||
for (size_t j = i + 1; j < input_faces.size(); ++j) { |
|||
if (suppressed[j]) continue; |
|||
|
|||
// Calculate IoU between box i and box j
|
|||
float iou = input_faces[i].box.iou_of(input_faces[j].box); |
|||
|
|||
// Suppress box j if IoU is greater than the threshold
|
|||
if (iou > iou_threshold) { |
|||
suppressed[j] = 1; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Scales the coordinates of detected faces from the model output dimensions -> the original input image dimensions.
|
|||
void Yolo::ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height){ |
|||
float width_scale = static_cast<float>(original_width) / modelSize; |
|||
float height_scale = static_cast<float>(original_height) / modelSize; |
|||
|
|||
for (auto &face : detected_faces) { |
|||
// Convert bounding box coordinates
|
|||
face.box.x1 *= width_scale; |
|||
face.box.y1 *= height_scale; |
|||
face.box.x2 *= width_scale; |
|||
face.box.y2 *= height_scale; |
|||
face.box.UpdateCenter(); |
|||
|
|||
// Convert landmarks
|
|||
for (size_t j = 0; j < face.landmarks.points.size(); ++j) { |
|||
face.landmarks.points[j].x *= width_scale; |
|||
face.landmarks.points[j].y *= height_scale; |
|||
} |
|||
} |
|||
} |
|||
|
|||
void Yolo::CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face, ofxCvColorImage &colorImage){ |
|||
|
|||
colorImage.resetROI(); |
|||
|
|||
// Calculate the coordinates and dimensions of the face box
|
|||
float x1 = face.box.x1; |
|||
float y1 = face.box.y1; |
|||
float x2 = face.box.x2; |
|||
float y2 = face.box.y2; |
|||
|
|||
// Ensure coordinates are within the input image bounds
|
|||
x1 = ofClamp(x1, 0.0f, (float)inputImage.getWidth()); |
|||
y1 = ofClamp(y1, 0.0f, (float)inputImage.getHeight()); |
|||
x2 = ofClamp(x2, 0.0f, (float)inputImage.getWidth()); |
|||
y2 = ofClamp(y2, 0.0f, (float)inputImage.getHeight()); |
|||
|
|||
// Calculate width and height of the cropped area
|
|||
float cropWidth = x2 - x1; |
|||
float cropHeight = y2 - y1; |
|||
|
|||
// Create cropped section, defined by the box coords
|
|||
ofFbo tempFbo; |
|||
tempFbo.allocate(cropWidth, cropHeight, GL_RGB); |
|||
|
|||
tempFbo.begin(); |
|||
ofClear(0); |
|||
inputImage.getTexture().drawSubsection(0, 0, cropWidth, cropHeight, x1, y1); |
|||
tempFbo.end(); |
|||
|
|||
ofFloatPixels pix; |
|||
tempFbo.readToPixels(pix); |
|||
colorImage.setFromPixels(pix); |
|||
|
|||
colorImage.resize(260, 260); |
|||
} |
|||
|
|||
void Yolo::SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces){ |
|||
std::sort(detectedFaces.begin(), detectedFaces.end(), |
|||
[](const types::BoxfWithLandmarks &a, const types::BoxfWithLandmarks &b) { |
|||
return a.box.score > b.box.score; // Sort in descending order
|
|||
}); |
|||
} |
@ -0,0 +1,81 @@ |
|||
#ifndef YOLO |
|||
#define YOLO |
|||
|
|||
#include "ofMain.h" |
|||
#include "ofxOpenCv.h" |
|||
#include <onnxruntime_cxx_api.h> |
|||
#include <algorithm> |
|||
|
|||
struct Emotef{ |
|||
float emotions[7]; |
|||
}; |
|||
|
|||
namespace types { |
|||
|
|||
/*
|
|||
Struct for storing information about detetced faces. |
|||
*/ |
|||
struct Boxf { |
|||
float x1, y1, x2, y2; // Coordinates of the bounding box
|
|||
float score; // Confidence score
|
|||
glm::vec2 center; |
|||
int label; // Class label (e.g., "face")
|
|||
std::string label_text; |
|||
Emotef emotional_state; |
|||
|
|||
// Calculate Intersection over Union (IoU) with another box
|
|||
float iou_of(const Boxf &other) const { |
|||
float intersection_x1 = std::max(x1, other.x1); |
|||
float intersection_y1 = std::max(y1, other.y1); |
|||
float intersection_x2 = std::min(x2, other.x2); |
|||
float intersection_y2 = std::min(y2, other.y2); |
|||
|
|||
float intersection_area = std::max(0.0f, intersection_x2 - intersection_x1) * |
|||
std::max(0.0f, intersection_y2 - intersection_y1); |
|||
|
|||
float this_area = (x2 - x1) * (y2 - y1); |
|||
float other_area = (other.x2 - other.x1) * (other.y2 - other.y1); |
|||
|
|||
float union_area = this_area + other_area - intersection_area; |
|||
|
|||
return intersection_area / union_area; |
|||
} |
|||
|
|||
void UpdateCenter(){ |
|||
center.x = (x1 + x2) / 2; |
|||
center.y = (y1 + y2) / 2; |
|||
} |
|||
|
|||
void SetEmotionState(float* emotional_data){ |
|||
std::copy(emotional_data, emotional_data + 7, emotional_state.emotions); |
|||
} |
|||
}; |
|||
|
|||
struct Landmarks { |
|||
std::vector<cv::Point2f> points; // Facial landmarks points (e.g., eyes, nose, mouth)
|
|||
bool flag = false; // Indicator if landmarks are available
|
|||
}; |
|||
|
|||
struct BoxfWithLandmarks { |
|||
Boxf box; // Bounding box for the face
|
|||
Landmarks landmarks; // Landmark points for the face
|
|||
bool flag = false; // Indicator if this detection is valid
|
|||
}; |
|||
} |
|||
|
|||
class Yolo{ |
|||
public: |
|||
Yolo(){}; |
|||
void ParseOutput(float* &out_ptr, std::vector<types::BoxfWithLandmarks> &sorted_faces, unsigned int num_anchors); |
|||
void DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces); |
|||
void DrawCenter(std::vector<types::BoxfWithLandmarks> &detected_faces); |
|||
void NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_faces, std::vector<types::BoxfWithLandmarks> &output_faces, float iou_threshold); |
|||
void ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height); |
|||
void CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face, ofxCvColorImage &colorImage); |
|||
void SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces); |
|||
private: |
|||
// Input dimenions of the model -- used for coordinate scaling.
|
|||
size_t modelSize = 640; |
|||
}; |
|||
|
|||
#endif |
Loading…
Reference in new issue