cailean
2 months ago
30 changed files with 1197 additions and 38 deletions
After Width: | Height: | Size: 3.2 KiB |
@ -0,0 +1,41 @@ |
|||||
|
OF_GLSL_SHADER_HEADER |
||||
|
precision mediump float; |
||||
|
|
||||
|
uniform sampler2DRect tex0; // Main texture |
||||
|
uniform sampler2D tex1; // Bayer texture |
||||
|
uniform vec2 resolution; |
||||
|
uniform float time; |
||||
|
uniform int frame; |
||||
|
|
||||
|
in vec2 varyingtexcoord; |
||||
|
out vec4 fragColor; |
||||
|
|
||||
|
const float SIZE = 256.0; |
||||
|
|
||||
|
const mat4 ditherMatrix = mat4( |
||||
|
1.0/17.0, 9.0/17.0, 3.0/17.0, 11.0/17.0, |
||||
|
13.0/17.0, 5.0/17.0, 15.0/17.0, 7.0/17.0, |
||||
|
4.0/17.0, 12.0/17.0, 2.0/17.0, 10.0/17.0, |
||||
|
16.0/17.0, 8.0/17.0, 14.0/17.0, 6.0/17.0 |
||||
|
); |
||||
|
|
||||
|
void main() { |
||||
|
vec2 uv = varyingtexcoord / resolution; |
||||
|
vec3 t = texture(tex0, varyingtexcoord).rgb; |
||||
|
|
||||
|
vec2 fragCoord = gl_FragCoord.xy; |
||||
|
|
||||
|
// color to display, use floating point precision here |
||||
|
vec3 col = t; |
||||
|
|
||||
|
// Sample the Bayer 8x8 texture for the dither pattern |
||||
|
float ditherValue = texture(tex1, fragCoord / 8.0).r; |
||||
|
|
||||
|
// Apply dithering with increased effect |
||||
|
col += (ditherValue - 0.5) / 2.0 * 2.0; |
||||
|
|
||||
|
// Quantize the color |
||||
|
col = (floor(col * 8.0) + 0.5) / 8.0; |
||||
|
|
||||
|
fragColor = vec4(col, 1.0); |
||||
|
} |
@ -0,0 +1,16 @@ |
|||||
|
OF_GLSL_SHADER_HEADER |
||||
|
|
||||
|
uniform mat4 modelViewMatrix; |
||||
|
uniform mat4 projectionMatrix; |
||||
|
uniform mat4 textureMatrix; |
||||
|
uniform mat4 modelViewProjectionMatrix; |
||||
|
|
||||
|
in vec4 position; |
||||
|
in vec2 texcoord; |
||||
|
|
||||
|
out vec2 varyingtexcoord; |
||||
|
void main() |
||||
|
{ |
||||
|
varyingtexcoord = texcoord; |
||||
|
gl_Position = modelViewProjectionMatrix * position; |
||||
|
} |
@ -0,0 +1,25 @@ |
|||||
|
#version 150 |
||||
|
|
||||
|
uniform sampler2D tex0; |
||||
|
|
||||
|
in vec2 tex_c; |
||||
|
|
||||
|
out vec4 outputColor; |
||||
|
|
||||
|
float hash13(vec3 p3) { |
||||
|
p3 = fract(p3 * .1031); |
||||
|
p3 += dot(p3, p3.yzx + 19.19); |
||||
|
return fract((p3.x + p3.y) * p3.z); |
||||
|
} |
||||
|
|
||||
|
void main() |
||||
|
{ |
||||
|
vec4 texColor = texture(tex0, tex_c); |
||||
|
|
||||
|
// Discard fragments with alpha below a threshold |
||||
|
if (texColor.a < 0.1) { |
||||
|
discard; |
||||
|
} |
||||
|
|
||||
|
outputColor = texColor; |
||||
|
} |
@ -0,0 +1,36 @@ |
|||||
|
#version 150 |
||||
|
|
||||
|
uniform mat4 modelViewProjectionMatrix; |
||||
|
uniform float gridSize; |
||||
|
|
||||
|
in vec2 texcoord; |
||||
|
in vec4 position; |
||||
|
|
||||
|
out vec2 tex_c; |
||||
|
|
||||
|
void main() |
||||
|
{ |
||||
|
tex_c = texcoord; |
||||
|
|
||||
|
vec2 resolution = vec2(1280.0, 960.0); |
||||
|
|
||||
|
// Transform the vertex position to clip space |
||||
|
vec4 clipPos = modelViewProjectionMatrix * position; |
||||
|
|
||||
|
// Perform perspective division |
||||
|
vec3 ndcPos = clipPos.xyz / clipPos.w; |
||||
|
|
||||
|
// Convert to screen space |
||||
|
vec2 screenPos = (ndcPos.xy + 1.0) * 0.5 * resolution; |
||||
|
|
||||
|
// Snap to grid |
||||
|
screenPos = floor(screenPos / gridSize) * gridSize; |
||||
|
|
||||
|
// Convert back to NDC space |
||||
|
ndcPos.xy = (screenPos / resolution) * 2.0 - 1.0; |
||||
|
|
||||
|
// Reconstruct the clip space position |
||||
|
clipPos = vec4(ndcPos * clipPos.w, clipPos.w); |
||||
|
|
||||
|
gl_Position = clipPos; |
||||
|
} |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,40 @@ |
|||||
|
#include "Request.h" |
||||
|
|
||||
|
/* setup http server */ |
||||
|
void Request::setup(std::string ip, int port, std::string page){ |
||||
|
std::cout << "Initialising HTTP Server" << std::endl; |
||||
|
|
||||
|
req.method = ofHttpRequest::POST; |
||||
|
req.url = "http://" + ip + ":" + ofToString(port) + "/" + page; |
||||
|
req.headers["Content-Type"] = "application/json"; |
||||
|
} |
||||
|
|
||||
|
/* send a request to vp_server & return frame/video/folder */ |
||||
|
VPResp Request::query(Embedding& in){ |
||||
|
VPResp vp_resp; |
||||
|
try { |
||||
|
req.body = "{\"vector\": [" + |
||||
|
ofToString(in.emotions["angry"]) + "," + |
||||
|
ofToString(in.emotions["disgust"]) + "," + |
||||
|
ofToString(in.emotions["fear"]) + "," + |
||||
|
ofToString(in.emotions["happy"]) + "," + |
||||
|
ofToString(in.emotions["sad"]) + "," + |
||||
|
ofToString(in.emotions["surprise"]) + "," + |
||||
|
ofToString(in.emotions["neutral"]) + "]}"; |
||||
|
|
||||
|
auto resp = http.handleRequest(req); |
||||
|
json_resp = ofJson::parse(resp.data.getText()); |
||||
|
vp_resp.folder = json_resp["folder"]; |
||||
|
vp_resp.image = json_resp["image"]; |
||||
|
vp_resp.video = json_resp["video"]; |
||||
|
vp_resp.frame = json_resp["frame"]; |
||||
|
vp_resp.lost = json_resp["lost"]; |
||||
|
|
||||
|
past_vp_resp = vp_resp; |
||||
|
|
||||
|
return vp_resp; |
||||
|
} catch (exception e) { |
||||
|
// Some issue happening here when plugging in controllers, or when they initially connect
|
||||
|
return past_vp_resp; |
||||
|
} |
||||
|
} |
@ -0,0 +1,44 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include "ofMain.h" |
||||
|
#include <string> |
||||
|
#include <unordered_map> |
||||
|
|
||||
|
struct Embedding { |
||||
|
std::unordered_map<std::string, float> emotions; |
||||
|
|
||||
|
Embedding() { |
||||
|
// Initialize with default values
|
||||
|
emotions["angry"] = 0.0f; |
||||
|
emotions["disgust"] = 0.0f; |
||||
|
emotions["fear"] = 0.0f; |
||||
|
emotions["happy"] = 0.0f; |
||||
|
emotions["sad"] = 0.0f; |
||||
|
emotions["surprise"] = 0.0f; |
||||
|
emotions["neutral"] = 0.0f; |
||||
|
} |
||||
|
|
||||
|
bool operator!=(const Embedding &other) const { |
||||
|
return emotions != other.emotions; |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
/* Vantage point query structure */ |
||||
|
struct VPResp{ |
||||
|
std::string folder; |
||||
|
std::string video; |
||||
|
std::string image; |
||||
|
std::string frame; |
||||
|
int lost; |
||||
|
}; |
||||
|
|
||||
|
class Request{ |
||||
|
public: |
||||
|
void setup(std::string ip, int port, std::string page); |
||||
|
VPResp query(Embedding& in); |
||||
|
|
||||
|
ofHttpRequest req; |
||||
|
ofURLFileLoader http; |
||||
|
ofJson json_resp; |
||||
|
VPResp past_vp_resp; |
||||
|
}; |
@ -0,0 +1,152 @@ |
|||||
|
#include "Server.h" |
||||
|
|
||||
|
void Server::start(){ |
||||
|
std::cout << "Initialising TCP sever" << std::endl; |
||||
|
server.setup(port); |
||||
|
osc_sender.setup(OSC_HOST, OSC_PORT); |
||||
|
http.setup(http_ip, http_port, http_page); |
||||
|
is_active = true; |
||||
|
previous_embedding = embedding; |
||||
|
last_change_time = std::chrono::steady_clock::now(); |
||||
|
} |
||||
|
|
||||
|
void Server::update(){ |
||||
|
|
||||
|
for ( int i = 0; i < server.getLastID(); i++){ |
||||
|
if (server.isClientConnected(i)) { |
||||
|
const int buffer_size = 8; |
||||
|
char buffer[buffer_size]; |
||||
|
int bytes_recieved = server.receiveRawBytes(i, buffer, buffer_size); |
||||
|
|
||||
|
if (bytes_recieved == buffer_size){ |
||||
|
float value; |
||||
|
int id; |
||||
|
memcpy(&value, buffer, sizeof(float)); |
||||
|
memcpy(&id, buffer + sizeof(float), sizeof(int)); |
||||
|
|
||||
|
std::string ip_address = server.getClientIP(i); |
||||
|
|
||||
|
addOrUpdateClient(id, value, ip_address); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
updateEmbedding(); |
||||
|
checkActivity(); |
||||
|
sendOSCMessage(); |
||||
|
|
||||
|
if(debug){ |
||||
|
printClients(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Server::addOrUpdateClient(int client_id, float value, const std::string& ip_address){ |
||||
|
ClientInfo client; |
||||
|
|
||||
|
client.ip_address = ip_address; |
||||
|
client.value = value; |
||||
|
|
||||
|
clients[client_id] = client; |
||||
|
} |
||||
|
|
||||
|
void Server::updateEmbedding() { |
||||
|
std::vector<std::string> emotionNames = { |
||||
|
"angry", "disgust", "fear", "happy", "sad", "surprise", "neutral" |
||||
|
}; |
||||
|
|
||||
|
for (const auto& c : clients) { |
||||
|
const ClientInfo& info = c.second; |
||||
|
float val = std::round(info.value * 100.0f) / 100.0f; |
||||
|
|
||||
|
if (c.first >= 0 && c.first < emotionNames.size()) { |
||||
|
embedding.emotions[emotionNames[c.first]] = val; |
||||
|
} |
||||
|
|
||||
|
// Special case for neutral (index 6)
|
||||
|
if (c.first == 6) { |
||||
|
embedding.emotions["fear"] = ofRandom(0.1, 0.6); |
||||
|
embedding.emotions["angry"] = ofRandom(0.01, 0.99); |
||||
|
embedding.emotions["happy"] = ofRandom(0.01, 0.99); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Server::printClients(){ |
||||
|
for( const auto& c : clients){ |
||||
|
int id = c.first; |
||||
|
const ClientInfo& info = c.second; |
||||
|
std::cout << "id: " << id |
||||
|
<< ", value: " << info.value |
||||
|
<< ", IP: " << info.ip_address << std::endl; |
||||
|
} |
||||
|
|
||||
|
std::cout << is_active << std::endl; |
||||
|
} |
||||
|
|
||||
|
/* check if the controllers are in use */ |
||||
|
void Server::checkActivity(){ |
||||
|
if (previous_embedding.emotions["neutral"] != embedding.emotions["neutral"]) { // Check if embedding has changed
|
||||
|
last_change_time = std::chrono::steady_clock::now(); // Reset the timer if there is a change
|
||||
|
previous_embedding = embedding; // Update the previous embedding to the current one
|
||||
|
is_active = true; |
||||
|
sendHttpRequest(); |
||||
|
} else { |
||||
|
// Calculate the time since the last change
|
||||
|
auto now = std::chrono::steady_clock::now(); |
||||
|
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - last_change_time).count(); |
||||
|
|
||||
|
if (duration >= 2) { |
||||
|
is_active = false; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/* send osc msg, check if audio file exists and it is different to the past audiofile */ |
||||
|
void Server::sendOSCMessage(){ |
||||
|
std::vector<ofxOscMessage> messages; |
||||
|
|
||||
|
ofxOscMessage me_0; |
||||
|
ofxOscMessage me_1; |
||||
|
ofxOscMessage me_2; |
||||
|
ofxOscMessage me_3; |
||||
|
ofxOscMessage me_file; |
||||
|
|
||||
|
std::string audio_file = vp_resp.folder; |
||||
|
|
||||
|
// Check if file exists in a given dir
|
||||
|
ofFile file("/home/cailean/Desktop/rave/all_wav_files/" + audio_file + ".wav"); |
||||
|
|
||||
|
if(!is_active && (audio_file != past_audio_file) && file.exists()){ |
||||
|
me_file.setAddress("/emote/filename"); |
||||
|
me_file.addStringArg(audio_file + ".wav"); |
||||
|
messages.push_back(me_file); |
||||
|
past_audio_file = audio_file; |
||||
|
} |
||||
|
|
||||
|
me_0.setAddress("/emote/0"); |
||||
|
me_0.addFloatArg(embedding.emotions["neutral"]); |
||||
|
messages.push_back(me_0); |
||||
|
me_1.setAddress("/emote/1"); |
||||
|
me_1.addFloatArg(embedding.emotions["neutral"]); |
||||
|
messages.push_back(me_1); |
||||
|
me_2.setAddress("/emote/2"); |
||||
|
me_2.addFloatArg(embedding.emotions["neutral"]); |
||||
|
messages.push_back(me_2); |
||||
|
me_3.setAddress("/emote/3"); |
||||
|
me_3.addFloatArg(embedding.emotions["neutral"]); |
||||
|
messages.push_back(me_3); |
||||
|
|
||||
|
for (auto& msg : messages){ |
||||
|
osc_sender.sendMessage(msg, false); |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
/* sends request to http server if is_active = true */ |
||||
|
void Server::sendHttpRequest(){ |
||||
|
vp_resp = http.query(embedding); |
||||
|
} |
||||
|
|
||||
|
std::vector<std::vector<double>> Server::generateRandomVectors(int count, int dimension){ |
||||
|
return tree.generateRandomVectors(count, dimension); |
||||
|
} |
@ -0,0 +1,58 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include "ofMain.h" |
||||
|
#include "ofxNetwork.h" |
||||
|
#include "Request.h" |
||||
|
#include <unordered_map> |
||||
|
#include <chrono> |
||||
|
#include "ofxOsc.h" |
||||
|
#include "../vp/VP.h" |
||||
|
|
||||
|
#define OSC_HOST "127.0.0.1" |
||||
|
#define OSC_PORT 9002 |
||||
|
|
||||
|
struct ClientInfo { |
||||
|
float value; |
||||
|
std::string ip_address; |
||||
|
}; |
||||
|
|
||||
|
class Server{ |
||||
|
public: |
||||
|
Server(int _port, Embedding _embedding, VP _tree, bool debug, std::string _http_ip, int _http_port, std::string _http_page) |
||||
|
: port(_port), embedding(_embedding), tree(_tree), debug(debug), http_ip(_http_ip), http_port(_http_port), http_page(_http_page) {} |
||||
|
|
||||
|
void start(); |
||||
|
void update(); |
||||
|
void addOrUpdateClient(int client_id, float value, const std::string& ip_address); |
||||
|
void printClients(); |
||||
|
void updateEmbedding(); |
||||
|
void checkActivity(); |
||||
|
void sendHttpRequest(); |
||||
|
void sendOSCMessage(); |
||||
|
std::vector<std::vector<double>> generateRandomVectors(int count, int dimension); |
||||
|
|
||||
|
|
||||
|
|
||||
|
int port; |
||||
|
ofxTCPServer server; |
||||
|
std::unordered_map<int, ClientInfo> clients; |
||||
|
bool debug; |
||||
|
bool is_active; |
||||
|
Embedding embedding; |
||||
|
|
||||
|
Request http; |
||||
|
std::string http_ip; |
||||
|
int http_port; |
||||
|
std::string http_page; |
||||
|
VPResp vp_resp; |
||||
|
|
||||
|
private: |
||||
|
Embedding previous_embedding; |
||||
|
std::chrono::time_point<std::chrono::steady_clock> last_change_time; |
||||
|
|
||||
|
ofxOscSender osc_sender; |
||||
|
std::string past_audio_file; |
||||
|
|
||||
|
/* vp tree */ |
||||
|
VP tree; |
||||
|
}; |
@ -0,0 +1,99 @@ |
|||||
|
#include "ofMain.h" |
||||
|
#include "Onnx.h" |
||||
|
#include "Yolo.h" |
||||
|
|
||||
|
class ModelThread : public ofThread |
||||
|
{ |
||||
|
public: |
||||
|
ofImage* img; |
||||
|
ofFbo* fbo; |
||||
|
Onnx* model; |
||||
|
Yolo* yolo; |
||||
|
std::vector<types::BoxfWithLandmarks>* detected_faces; |
||||
|
std::string model_type; |
||||
|
|
||||
|
// emotional recognition model
|
||||
|
std::vector<ofImage>* croppedFaces; |
||||
|
float* emotional_data; |
||||
|
|
||||
|
|
||||
|
~ModelThread(){ |
||||
|
stop(); |
||||
|
waitForThread(false); |
||||
|
} |
||||
|
|
||||
|
void setup(ofImage* _img, ofFbo* _fbo, Onnx* _model){ |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
this->img = _img; |
||||
|
this->fbo = _fbo; |
||||
|
this->model = _model; |
||||
|
this->model_type = "depth"; |
||||
|
} |
||||
|
|
||||
|
void setupYolo(ofImage* _img, std::vector<types::BoxfWithLandmarks>* _detected_faces, Onnx* _model, Yolo* _yolo){ |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
this->img = _img; |
||||
|
this->detected_faces = _detected_faces; |
||||
|
this->model_type = "yolo"; |
||||
|
this->model = _model; |
||||
|
this->yolo = _yolo; |
||||
|
} |
||||
|
|
||||
|
void start(){ |
||||
|
startThread(); |
||||
|
} |
||||
|
|
||||
|
void stop(){ |
||||
|
stopThread(); |
||||
|
condition.notify_all(); |
||||
|
} |
||||
|
|
||||
|
void threadedFunction(){ |
||||
|
while(isThreadRunning()){ |
||||
|
if(model_type == "depth"){ |
||||
|
std::unique_lock<std::mutex> lock(mutex); |
||||
|
inferDepthImage(fbo, img, model); |
||||
|
condition.wait(lock); |
||||
|
} else if(model_type == "yolo") { |
||||
|
std::unique_lock<std::mutex> lock(mutex); |
||||
|
inferYolo(); |
||||
|
condition.wait(lock); |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void update(){ |
||||
|
std::lock_guard<std::mutex> lock(mutex); |
||||
|
condition.notify_one(); |
||||
|
} |
||||
|
|
||||
|
void inferYolo(){ |
||||
|
auto output_tensors_face = model->Run(*img); |
||||
|
|
||||
|
auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape(); |
||||
|
|
||||
|
unsigned int num_anchors = output_faces[1]; // Number of anchors
|
||||
|
|
||||
|
float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>(); |
||||
|
|
||||
|
yolo->ParseOutput(output_face_ptr, *detected_faces, num_anchors); |
||||
|
} |
||||
|
|
||||
|
void inferDepthImage(ofFbo* fbo, ofImage* img, Onnx* model){ |
||||
|
auto output_tensors = model->Run(*img); |
||||
|
float* output_ptr = output_tensors.front().GetTensorMutableData<float>(); |
||||
|
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount(); |
||||
|
|
||||
|
float min_value = model->ReduceMin(output_ptr, num_elements); |
||||
|
float max_value = model->ReduceMax(output_ptr, num_elements); |
||||
|
|
||||
|
model->Normalize(output_ptr, num_elements, min_value, max_value); |
||||
|
|
||||
|
model->DataToFbo(output_ptr, 518, 518, *fbo); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
protected: |
||||
|
std::condition_variable condition; |
||||
|
}; |
@ -0,0 +1,272 @@ |
|||||
|
#include "Onnx.h" |
||||
|
#include <cmath> |
||||
|
|
||||
|
// Setups the model. CUDA is enabled by default
|
||||
|
void Onnx::Setup(ORTCHAR_T* modelPath, bool isLog, bool useCuda){ |
||||
|
session_options.SetIntraOpNumThreads(1); |
||||
|
session_options.SetIntraOpNumThreads(1); |
||||
|
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); |
||||
|
log = isLog; |
||||
|
|
||||
|
// cuda setup
|
||||
|
if(useCuda){ |
||||
|
OrtCUDAProviderOptions opts; |
||||
|
opts.device_id = 0; |
||||
|
opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; |
||||
|
opts.do_copy_in_default_stream = 0; |
||||
|
opts.arena_extend_strategy = 0; |
||||
|
session_options.AppendExecutionProvider_CUDA(opts); |
||||
|
} |
||||
|
|
||||
|
ort_session = std::make_shared<Ort::Session>(ort_env, modelPath, session_options); |
||||
|
|
||||
|
Ort::AllocatorWithDefaultOptions allocator; |
||||
|
for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) { |
||||
|
input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get()); |
||||
|
input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); |
||||
|
if(log) |
||||
|
std::cout << "\t" << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl; |
||||
|
} |
||||
|
|
||||
|
// some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size.
|
||||
|
for (auto& s : input_node_dims) { |
||||
|
if (s < 0) { |
||||
|
s = 1; |
||||
|
if(log) |
||||
|
std::cout << "transfromed!" << std::endl; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
|
||||
|
|
||||
|
if(log) |
||||
|
std::cout << "Output Node Name/Shape (" << output_node_names.size() << "):" << std::endl; |
||||
|
for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) { |
||||
|
output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get()); |
||||
|
auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); |
||||
|
if(log) |
||||
|
std::cout << "\t" << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl; |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// Runs the model, given an image
|
||||
|
std::vector<Ort::Value> Onnx::Run(ofImage &img){ |
||||
|
auto start = std::chrono::high_resolution_clock::now(); |
||||
|
|
||||
|
TransformImage(img); |
||||
|
size_t input_tensor_size = image_array.total(); |
||||
|
std::vector<Ort::Value> input_tensors; |
||||
|
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); |
||||
|
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(mem_info, (float*)image_array.data, |
||||
|
input_tensor_size, input_node_dims.data(), input_node_dims.size())); |
||||
|
|
||||
|
// double-check the dimensions of the input tensor
|
||||
|
assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == input_node_dims); |
||||
|
|
||||
|
//std::cout << "\ninput_tensor shape: " << PrintShape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) << std::endl;
|
||||
|
|
||||
|
// pass data through model
|
||||
|
std::vector<const char*> input_names_char(input_node_names.size(), nullptr); |
||||
|
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), |
||||
|
[&](const std::string& str) { return str.c_str(); }); |
||||
|
|
||||
|
std::vector<const char*> output_names_char(output_node_names.size(), nullptr); |
||||
|
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), |
||||
|
[&](const std::string& str) { return str.c_str(); }); |
||||
|
|
||||
|
try { |
||||
|
auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_names_char.data(), input_tensors.data(), |
||||
|
input_names_char.size(), output_names_char.data(), output_names_char.size()); |
||||
|
|
||||
|
if (timeStamp) { |
||||
|
auto end = std::chrono::high_resolution_clock::now(); |
||||
|
std::chrono::duration<double, std::milli> elapsed = end - start; |
||||
|
std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; |
||||
|
} |
||||
|
|
||||
|
return output_tensors; |
||||
|
|
||||
|
} catch (const Ort::Exception& exception) { |
||||
|
std::cout << "ERROR running model inference: " << exception.what() << std::endl; |
||||
|
return input_tensors; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
/*
|
||||
|
Runs a model, with a batch of images as input (emotion.onnx) |
||||
|
(1) Creates a 1-dim float array based on the batch * channels * width * height) |
||||
|
(2) Transforms each image into a cv::Mat, and appends it to the array |
||||
|
(3) Sends that information to the model to be processed. |
||||
|
*/ |
||||
|
std::vector<Ort::Value> Onnx::RunBatch(std::vector<ofImage>& images){ |
||||
|
|
||||
|
auto start = std::chrono::high_resolution_clock::now(); |
||||
|
|
||||
|
// Number of images in the batch
|
||||
|
size_t batchSize = images.size(); |
||||
|
|
||||
|
std::vector<int64_t> batch_node_dims = {static_cast<int64_t>(batchSize), 3, 260, 260}; |
||||
|
|
||||
|
std::vector<float> batch_image_array(static_cast<int64_t>(batchSize) * 3 * 260 * 260); |
||||
|
|
||||
|
for(size_t i = 0; i < batchSize; i++){ |
||||
|
TransformImage(images[i]); |
||||
|
// Copy the image data into the batch_image_array
|
||||
|
std::memcpy( |
||||
|
batch_image_array.data() + i * 3 * 260 * 260, // Destination: starting point for this image in the batch array
|
||||
|
image_array.data, // Source: pixel data in the cv::Mat
|
||||
|
3 * 260 * 260 * sizeof(float) // Size: number of bytes to copy (3 channels * 260 * 260 pixels)
|
||||
|
); |
||||
|
} |
||||
|
|
||||
|
size_t input_tensor_size = batch_image_array.size(); |
||||
|
std::vector<Ort::Value> input_tensors; |
||||
|
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); |
||||
|
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(mem_info, batch_image_array.data(), |
||||
|
input_tensor_size, batch_node_dims.data(), batch_node_dims.size())); |
||||
|
|
||||
|
// double-check the dimensions of the input tensor
|
||||
|
assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == batch_node_dims); |
||||
|
|
||||
|
std::cout << "\ninput_tensor shape: " << PrintShape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) << std::endl; |
||||
|
|
||||
|
// pass data through model
|
||||
|
std::vector<const char*> input_names_char(input_node_names.size(), nullptr); |
||||
|
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char), |
||||
|
[&](const std::string& str) { return str.c_str(); }); |
||||
|
|
||||
|
std::vector<const char*> output_names_char(output_node_names.size(), nullptr); |
||||
|
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char), |
||||
|
[&](const std::string& str) { return str.c_str(); }); |
||||
|
|
||||
|
try { |
||||
|
auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_names_char.data(), input_tensors.data(), |
||||
|
input_names_char.size(), output_names_char.data(), output_names_char.size()); |
||||
|
|
||||
|
if (timeStamp) { |
||||
|
auto end = std::chrono::high_resolution_clock::now(); |
||||
|
std::chrono::duration<double, std::milli> elapsed = end - start; |
||||
|
std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl; |
||||
|
} |
||||
|
|
||||
|
return output_tensors; |
||||
|
|
||||
|
} catch (const Ort::Exception& exception) { |
||||
|
std::cout << "ERROR running model inference: " << exception.what() << std::endl; |
||||
|
return input_tensors; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// Transforms an ofImage into a cv::ImageArray
|
||||
|
void Onnx::TransformImage(ofImage &img){ |
||||
|
// Convert ofImage to cv::Mat
|
||||
|
ofPixels &pixels = img.getPixels(); // Get pixels from ofImage
|
||||
|
int width = img.getWidth(); |
||||
|
int height = img.getHeight(); |
||||
|
int channels = img.getPixels().getNumChannels(); |
||||
|
|
||||
|
// Create a cv::Mat from the pixel data
|
||||
|
cv::Mat cvImg = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pixels.getData()); |
||||
|
|
||||
|
// Convert cv::Mat to cv::InputArray
|
||||
|
cv::InputArray inputArray(cvImg); |
||||
|
|
||||
|
image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2] , input_node_dims[3] ), (0, 0, 0), false, false); |
||||
|
} |
||||
|
|
||||
|
/* Generates a random 1 dimensional float array, creating values between 0->255.
|
||||
|
Returns a tensor. */ |
||||
|
Ort::Value Onnx::GenerateTensor(){ |
||||
|
std::vector<float> random_input_tensor_values(CalculateProduct(input_node_dims)); |
||||
|
std::generate(random_input_tensor_values.begin(), random_input_tensor_values.end(), [&] { return rand() % 255; }); |
||||
|
return VectorToTensor(random_input_tensor_values, input_node_dims); |
||||
|
} |
||||
|
|
||||
|
// Calculates the product of the vector, how many values.
|
||||
|
int Onnx::CalculateProduct(const std::vector<std::int64_t>& v){ |
||||
|
int total = 1; |
||||
|
for (auto& i : v) total *= i; |
||||
|
return total; |
||||
|
} |
||||
|
|
||||
|
// Creates a tensor from a given vector input.
|
||||
|
Ort::Value Onnx::VectorToTensor(std::vector<float>& data, const std::vector<std::int64_t>& shape){ |
||||
|
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); |
||||
|
auto tensor = Ort::Value::CreateTensor<float>(mem_info, data.data(), data.size(), shape.data(), shape.size()); |
||||
|
return tensor; |
||||
|
} |
||||
|
|
||||
|
// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
|
||||
|
std::string Onnx::PrintShape(const std::vector<std::int64_t>& v){ |
||||
|
std::stringstream ss(""); |
||||
|
for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x"; |
||||
|
ss << v[v.size() - 1]; |
||||
|
return ss.str(); |
||||
|
} |
||||
|
|
||||
|
// Function to calculate the minimum value in an array
|
||||
|
float Onnx::ReduceMin(const float* data, size_t size) { |
||||
|
return *std::min_element(data, data + size); |
||||
|
} |
||||
|
|
||||
|
// Function to calculate the maximum value in an array
|
||||
|
float Onnx::ReduceMax(const float* data, size_t size) { |
||||
|
return *std::max_element(data, data + size); |
||||
|
} |
||||
|
|
||||
|
// Function to normalize an array between 0 and 1
|
||||
|
void Onnx::Normalize(float* data, size_t size, float min_value, float max_value) { |
||||
|
for (size_t i = 0; i < size; ++i) { |
||||
|
data[i] = (data[i] - min_value) / (max_value - min_value); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Coverts the output tensor data to a texture of a given ofFbo.
|
||||
|
void Onnx::DataToFbo(float* data, size_t width, size_t height, ofFbo& fbo){ |
||||
|
// Convert data into opencv mat
|
||||
|
cv::Mat inputMat(height, width, CV_32FC1); |
||||
|
memcpy(inputMat.data, data, width * height * sizeof(float)); |
||||
|
// // Convert to 8-bit grayscale Mat
|
||||
|
cv::Mat inputMat8U; |
||||
|
inputMat.convertTo(inputMat8U, CV_8UC1, 255.0); // Convert float to 8-bit grayscale
|
||||
|
|
||||
|
// // Resize the image using OpenCV
|
||||
|
cv::Mat resizedMat; |
||||
|
cv::resize(inputMat8U, resizedMat, cv::Size(fbo.getWidth(), fbo.getHeight()), 0, 0, cv::INTER_LINEAR); |
||||
|
|
||||
|
// // Convert OpenCV Mat to ofPixels
|
||||
|
pixels.allocate(fbo.getWidth(), fbo.getHeight(), OF_PIXELS_GRAY); |
||||
|
|
||||
|
// // Copy data from resizedMat to ofPixels
|
||||
|
memcpy(pixels.getData(), resizedMat.data, fbo.getWidth() * fbo.getHeight()); |
||||
|
} |
||||
|
|
||||
|
void Onnx::SetPixels(ofFbo& fbo){ |
||||
|
fbo.begin(); |
||||
|
ofTexture& texture = fbo.getTexture(); |
||||
|
texture.loadData(pixels); |
||||
|
fbo.end(); |
||||
|
} |
||||
|
|
||||
|
void Onnx::Softmax(float* data, size_t size) { |
||||
|
std::vector<float> logits(data, data + size); |
||||
|
std::vector<float> expValues(size); |
||||
|
float maxLogit = *std::max_element(logits.begin(), logits.end()); |
||||
|
|
||||
|
// Calculate exp(logit - maxLogit) for numerical stability
|
||||
|
std::transform(logits.begin(), logits.end(), expValues.begin(), |
||||
|
[maxLogit](float logit) { return std::exp(logit - maxLogit); }); |
||||
|
|
||||
|
float sumExp = std::accumulate(expValues.begin(), expValues.end(), 0.0f); |
||||
|
|
||||
|
// Normalize to get probabilities
|
||||
|
std::transform(expValues.begin(), expValues.end(), data, |
||||
|
[sumExp](float expValue) { return expValue / sumExp; }); |
||||
|
} |
||||
|
|
||||
|
|
@ -0,0 +1,56 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include <onnxruntime_cxx_api.h> |
||||
|
#include <algorithm> // std::generate |
||||
|
#include <cassert> |
||||
|
#include <cstddef> |
||||
|
#include <cstdint> |
||||
|
#include <iostream> |
||||
|
#include <sstream> |
||||
|
#include <string> |
||||
|
#include <vector> |
||||
|
#include <cstdlib> // For std::rand and std::srand |
||||
|
#include "ofMain.h" |
||||
|
#include "ofxOpenCv.h" |
||||
|
#include <numeric> |
||||
|
#include <cmath> |
||||
|
|
||||
|
class Onnx { |
||||
|
|
||||
|
public: |
||||
|
Onnx() {} |
||||
|
void Setup(ORTCHAR_T* modelPath, bool isLog, bool useCuda); |
||||
|
std::vector<Ort::Value> Run(ofImage &img); |
||||
|
std::vector<Ort::Value> RunBatch(std::vector<ofImage> &images); |
||||
|
std::string PrintShape(const std::vector<std::int64_t>& v); |
||||
|
Ort::Value VectorToTensor(std::vector<float>& data, const std::vector<std::int64_t>& shape); |
||||
|
Ort::Value GenerateTensor(); |
||||
|
int CalculateProduct(const std::vector<std::int64_t>& v); |
||||
|
void TransformImage(ofImage &img); |
||||
|
float ReduceMin(const float* data, size_t size); |
||||
|
float ReduceMax(const float* data, size_t size); |
||||
|
void Normalize(float* data, size_t size, float min_value, float max_value); |
||||
|
void DataToFbo(float* data, size_t width, size_t height, ofFbo& fbo); |
||||
|
void Softmax(float* data, size_t size); |
||||
|
void SetPixels(ofFbo& fbo); |
||||
|
bool timeStamp = false; |
||||
|
bool log = false; |
||||
|
ofPixels pixels; |
||||
|
|
||||
|
protected: |
||||
|
Ort::Env ort_env; |
||||
|
Ort::SessionOptions session_options; |
||||
|
cv::Mat image_array; |
||||
|
std::shared_ptr<Ort::Session> ort_session; |
||||
|
std::vector<std::string> input_node_names; |
||||
|
std::vector<int64_t> input_node_dims; // 1 input only.
|
||||
|
std::size_t input_tensor_size = 1; |
||||
|
std::vector<float> input_values_handler; |
||||
|
Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); |
||||
|
std::vector<std::string> output_node_names; |
||||
|
std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
|
||||
|
std::vector<Ort::Value> output_values; |
||||
|
Ort::Value dummy_tensor{ nullptr }; |
||||
|
std::vector<ofImage> imageBatch; |
||||
|
int num_outputs = 1; |
||||
|
}; |
@ -0,0 +1,157 @@ |
|||||
|
#include "Yolo.h" |
||||
|
|
||||
|
// Takes output tensor data, processes that data, and returns an array of BoxfWithLandmarks (detected_faces)
|
||||
|
void Yolo::ParseOutput(float* &output_tensors, std::vector<types::BoxfWithLandmarks> &sorted_faces, unsigned int num_anchors) { |
||||
|
|
||||
|
std::vector<types::BoxfWithLandmarks> detected_faces; |
||||
|
|
||||
|
for (unsigned int i = 0; i < num_anchors; ++i) { |
||||
|
const float *row_ptr = output_tensors + i * 16; // Each row contains 16 values
|
||||
|
float obj_conf = row_ptr[4]; // Objectness confidence
|
||||
|
|
||||
|
if (obj_conf < 0.5) continue; // Filter out low-confidence detections
|
||||
|
|
||||
|
// Extract bounding box, confidence, and landmarks
|
||||
|
float cx = row_ptr[0]; |
||||
|
float cy = row_ptr[1]; |
||||
|
float w = row_ptr[2]; |
||||
|
float h = row_ptr[3]; |
||||
|
float cls_conf = row_ptr[15]; // Face confidence score
|
||||
|
|
||||
|
if (cls_conf < 0.5) continue; // Filter by class confidence
|
||||
|
|
||||
|
types::BoxfWithLandmarks face; |
||||
|
face.box.x1 = cx - w / 2; |
||||
|
face.box.y1 = cy - h / 2; |
||||
|
face.box.x2 = cx + w / 2; |
||||
|
face.box.y2 = cy + h / 2; |
||||
|
face.box.score = cls_conf; |
||||
|
face.box.label_text = "face"; |
||||
|
|
||||
|
// Extract landmarks
|
||||
|
for (int j = 0; j < 10; j += 2) { |
||||
|
face.landmarks.points.push_back(cv::Point2f(row_ptr[5 + j], row_ptr[5 + j + 1])); |
||||
|
} |
||||
|
|
||||
|
detected_faces.push_back(face); // Store the detected face with landmarks
|
||||
|
} |
||||
|
|
||||
|
// Apply NMS to detected faces list to remove any overlapping bounding boxes.
|
||||
|
NonMaximumSuppression(detected_faces, sorted_faces, 0.5); |
||||
|
|
||||
|
// Sort faces based on confidence value
|
||||
|
SortDetectedFaces(sorted_faces); |
||||
|
} |
||||
|
|
||||
|
// Simple helper for drawing boxes given x1, y1, x2, y2 coordinates.
|
||||
|
void Yolo::DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces){ |
||||
|
for (const auto &face : detected_faces) { |
||||
|
ofNoFill(); |
||||
|
float w = ofGetWindowWidth() / 2; |
||||
|
ofDrawRectangle(face.box.x1 + w, face.box.y1, ((face.box.x2 + w) - (face.box.x1 + w)), face.box.y2 - face.box.y1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Simple helper to draw boxes at the center of the detected face.
|
||||
|
void Yolo::DrawCenter(std::vector<types::BoxfWithLandmarks> &detected_faces){ |
||||
|
ofNoFill(); |
||||
|
glm::vec2 position = detected_faces[0].box.center; |
||||
|
ofDrawCircle(position.x + ofGetWindowWidth() / 2, position.y, 5); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// Applies NMS to an array of BoxfWithLandmarks.face.boxes, to remove any overlapping bounding boxes.
|
||||
|
void Yolo::NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_faces, std::vector<types::BoxfWithLandmarks> &output_faces, float iou_threshold) |
||||
|
{ |
||||
|
// Sort the boxes by their confidence scores (highest to lowest)
|
||||
|
std::sort(input_faces.begin(), input_faces.end(), |
||||
|
[](const types::BoxfWithLandmarks &a, const types::BoxfWithLandmarks &b) { |
||||
|
return a.box.score > b.box.score; |
||||
|
}); |
||||
|
|
||||
|
std::vector<int> suppressed(input_faces.size(), 0); // Suppression mask
|
||||
|
|
||||
|
// Iterate through the boxes
|
||||
|
for (size_t i = 0; i < input_faces.size(); ++i) { |
||||
|
if (suppressed[i]) continue; // Skip already suppressed boxes
|
||||
|
|
||||
|
// Add this box to the output
|
||||
|
output_faces.push_back(input_faces[i]); |
||||
|
|
||||
|
for (size_t j = i + 1; j < input_faces.size(); ++j) { |
||||
|
if (suppressed[j]) continue; |
||||
|
|
||||
|
// Calculate IoU between box i and box j
|
||||
|
float iou = input_faces[i].box.iou_of(input_faces[j].box); |
||||
|
|
||||
|
// Suppress box j if IoU is greater than the threshold
|
||||
|
if (iou > iou_threshold) { |
||||
|
suppressed[j] = 1; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Scales the coordinates of detected faces from the model output dimensions -> the original input image dimensions.
|
||||
|
void Yolo::ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height){ |
||||
|
float width_scale = static_cast<float>(original_width) / modelSize; |
||||
|
float height_scale = static_cast<float>(original_height) / modelSize; |
||||
|
|
||||
|
for (auto &face : detected_faces) { |
||||
|
// Convert bounding box coordinates
|
||||
|
face.box.x1 *= width_scale; |
||||
|
face.box.y1 *= height_scale; |
||||
|
face.box.x2 *= width_scale; |
||||
|
face.box.y2 *= height_scale; |
||||
|
face.box.UpdateCenter(); |
||||
|
|
||||
|
// Convert landmarks
|
||||
|
for (size_t j = 0; j < face.landmarks.points.size(); ++j) { |
||||
|
face.landmarks.points[j].x *= width_scale; |
||||
|
face.landmarks.points[j].y *= height_scale; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Yolo::CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face, ofxCvColorImage &colorImage){ |
||||
|
|
||||
|
colorImage.resetROI(); |
||||
|
|
||||
|
// Calculate the coordinates and dimensions of the face box
|
||||
|
float x1 = face.box.x1; |
||||
|
float y1 = face.box.y1; |
||||
|
float x2 = face.box.x2; |
||||
|
float y2 = face.box.y2; |
||||
|
|
||||
|
// Ensure coordinates are within the input image bounds
|
||||
|
x1 = ofClamp(x1, 0.0f, (float)inputImage.getWidth()); |
||||
|
y1 = ofClamp(y1, 0.0f, (float)inputImage.getHeight()); |
||||
|
x2 = ofClamp(x2, 0.0f, (float)inputImage.getWidth()); |
||||
|
y2 = ofClamp(y2, 0.0f, (float)inputImage.getHeight()); |
||||
|
|
||||
|
// Calculate width and height of the cropped area
|
||||
|
float cropWidth = x2 - x1; |
||||
|
float cropHeight = y2 - y1; |
||||
|
|
||||
|
// Create cropped section, defined by the box coords
|
||||
|
ofFbo tempFbo; |
||||
|
tempFbo.allocate(cropWidth, cropHeight, GL_RGB); |
||||
|
|
||||
|
tempFbo.begin(); |
||||
|
ofClear(0); |
||||
|
inputImage.getTexture().drawSubsection(0, 0, cropWidth, cropHeight, x1, y1); |
||||
|
tempFbo.end(); |
||||
|
|
||||
|
ofFloatPixels pix; |
||||
|
tempFbo.readToPixels(pix); |
||||
|
colorImage.setFromPixels(pix); |
||||
|
|
||||
|
colorImage.resize(260, 260); |
||||
|
} |
||||
|
|
||||
|
void Yolo::SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces){ |
||||
|
std::sort(detectedFaces.begin(), detectedFaces.end(), |
||||
|
[](const types::BoxfWithLandmarks &a, const types::BoxfWithLandmarks &b) { |
||||
|
return a.box.score > b.box.score; // Sort in descending order
|
||||
|
}); |
||||
|
} |
@ -0,0 +1,81 @@ |
|||||
|
#ifndef YOLO |
||||
|
#define YOLO |
||||
|
|
||||
|
#include "ofMain.h" |
||||
|
#include "ofxOpenCv.h" |
||||
|
#include <onnxruntime_cxx_api.h> |
||||
|
#include <algorithm> |
||||
|
|
||||
|
struct Emotef{ |
||||
|
float emotions[7]; |
||||
|
}; |
||||
|
|
||||
|
namespace types { |
||||
|
|
||||
|
/*
|
||||
|
Struct for storing information about detetced faces. |
||||
|
*/ |
||||
|
struct Boxf { |
||||
|
float x1, y1, x2, y2; // Coordinates of the bounding box
|
||||
|
float score; // Confidence score
|
||||
|
glm::vec2 center; |
||||
|
int label; // Class label (e.g., "face")
|
||||
|
std::string label_text; |
||||
|
Emotef emotional_state; |
||||
|
|
||||
|
// Calculate Intersection over Union (IoU) with another box
|
||||
|
float iou_of(const Boxf &other) const { |
||||
|
float intersection_x1 = std::max(x1, other.x1); |
||||
|
float intersection_y1 = std::max(y1, other.y1); |
||||
|
float intersection_x2 = std::min(x2, other.x2); |
||||
|
float intersection_y2 = std::min(y2, other.y2); |
||||
|
|
||||
|
float intersection_area = std::max(0.0f, intersection_x2 - intersection_x1) * |
||||
|
std::max(0.0f, intersection_y2 - intersection_y1); |
||||
|
|
||||
|
float this_area = (x2 - x1) * (y2 - y1); |
||||
|
float other_area = (other.x2 - other.x1) * (other.y2 - other.y1); |
||||
|
|
||||
|
float union_area = this_area + other_area - intersection_area; |
||||
|
|
||||
|
return intersection_area / union_area; |
||||
|
} |
||||
|
|
||||
|
void UpdateCenter(){ |
||||
|
center.x = (x1 + x2) / 2; |
||||
|
center.y = (y1 + y2) / 2; |
||||
|
} |
||||
|
|
||||
|
void SetEmotionState(float* emotional_data){ |
||||
|
std::copy(emotional_data, emotional_data + 7, emotional_state.emotions); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
struct Landmarks { |
||||
|
std::vector<cv::Point2f> points; // Facial landmarks points (e.g., eyes, nose, mouth)
|
||||
|
bool flag = false; // Indicator if landmarks are available
|
||||
|
}; |
||||
|
|
||||
|
struct BoxfWithLandmarks { |
||||
|
Boxf box; // Bounding box for the face
|
||||
|
Landmarks landmarks; // Landmark points for the face
|
||||
|
bool flag = false; // Indicator if this detection is valid
|
||||
|
}; |
||||
|
} |
||||
|
|
||||
|
class Yolo{ |
||||
|
public: |
||||
|
Yolo(){}; |
||||
|
void ParseOutput(float* &out_ptr, std::vector<types::BoxfWithLandmarks> &sorted_faces, unsigned int num_anchors); |
||||
|
void DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces); |
||||
|
void DrawCenter(std::vector<types::BoxfWithLandmarks> &detected_faces); |
||||
|
void NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_faces, std::vector<types::BoxfWithLandmarks> &output_faces, float iou_threshold); |
||||
|
void ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height); |
||||
|
void CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face, ofxCvColorImage &colorImage); |
||||
|
void SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces); |
||||
|
private: |
||||
|
// Input dimenions of the model -- used for coordinate scaling.
|
||||
|
size_t modelSize = 640; |
||||
|
}; |
||||
|
|
||||
|
#endif |
Loading…
Reference in new issue