Browse Source

added vp_tree server onnx

master
cailean 2 months ago
parent
commit
2b845eac0f
  1. 4
      .gitignore
  2. 21
      .vscode/c_cpp_properties.json
  3. 8
      addons.make
  4. 0
      bin/data/.gitkeep
  5. BIN
      bin/data/debug_binaryMat.png
  6. 41
      bin/data/shaders/dither.frag
  7. 16
      bin/data/shaders/dither.vert
  8. 25
      bin/data/shaders/vertex_snap.frag
  9. 36
      bin/data/shaders/vertex_snap.vert
  10. BIN
      bin/data/vp-tree.bin
  11. BIN
      bin/image-to-mesh
  12. 8
      image-to-mesh.code-workspace
  13. 2
      src/Bullet.cpp
  14. 2
      src/Bullet.h
  15. 40
      src/network/Request.cpp
  16. 44
      src/network/Request.h
  17. 152
      src/network/Server.cpp
  18. 58
      src/network/Server.h
  19. 81
      src/ofApp.cpp
  20. 30
      src/ofApp.h
  21. 99
      src/onnx/ModelThread.h
  22. 272
      src/onnx/Onnx.cpp
  23. 56
      src/onnx/Onnx.h
  24. 157
      src/onnx/Yolo.cpp
  25. 81
      src/onnx/Yolo.h
  26. 0
      src/utils/MeshGenerator.cpp
  27. 0
      src/utils/MeshGenerator.h
  28. 0
      src/vp/VP.cpp
  29. 0
      src/vp/VP.h
  30. 0
      src/vp/VpTree.hpp

4
.gitignore

@ -11,6 +11,8 @@
/bin/data/images /bin/data/images
/bin/data/recordings /bin/data/recordings
/bin/data/models
/bin/libs/
######### #########
# general # general
@ -87,5 +89,3 @@ Desktop.ini
# Android # Android
.csettings .csettings
/bin/data/

21
.vscode/c_cpp_properties.json

@ -107,9 +107,6 @@
"${workspaceRoot}", "${workspaceRoot}",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOpenCv/libs/opencv/include/opencv4", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOpenCv/libs/opencv/include/opencv4",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOpenCv/src", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOpenCv/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBox2d/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBox2d/libs/Box2D",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBox2d/libs",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/libs", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/libs",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src/shapes", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src/shapes",
@ -118,8 +115,17 @@
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/libs/bullet2.8.2/include/BulletCollision/CollisionShapes", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/libs/bullet2.8.2/include/BulletCollision/CollisionShapes",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/libs/bullet2.8.2/include", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/libs/bullet2.8.2/include",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src/joints", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src/joints",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxFFmpeg/src", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxVideoRecorder/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxVideoRecorder/src" "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOsc/*",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxNetwork/src/*",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxTSNE/src/*",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxPiMapper/src/*",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxXmlSettings/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxXmlSettings/libs",
"/usr/local/onnxruntime-linux-x64-gpu-1.19.2/include",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOsc/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOsc/libs/oscpack/src/osc",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOsc/libs/oscpack/src/ip"
], ],
"browse": { "browse": {
"limitSymbolsToIncludedHeaders": true, "limitSymbolsToIncludedHeaders": true,
@ -131,8 +137,9 @@
"${workspaceRoot}/../../../addons", "${workspaceRoot}/../../../addons",
"${workspaceRoot}/../../../libs/openFrameworks", "${workspaceRoot}/../../../libs/openFrameworks",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOpenCv/src", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOpenCv/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBox2d/src", "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxBullet/src" "/usr/local/onnxruntime-linux-x64-gpu-1.19.2/include",
"/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/addons/ofxOsc/*"
] ]
}, },
"intelliSenseMode": "clang-x64", "intelliSenseMode": "clang-x64",

8
addons.make

@ -2,3 +2,11 @@ ofxOpenCv
ofxOpenCv ofxOpenCv
ofxBullet ofxBullet
ofxBullet ofxBullet
ofxOsc
ofxOsc
ofxNetwork
ofxNetwork
ofxTSNE
ofxTSNE
ofxPiMapper
ofxPiMapper

0
bin/data/.gitkeep

BIN
bin/data/debug_binaryMat.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

41
bin/data/shaders/dither.frag

@ -0,0 +1,41 @@
OF_GLSL_SHADER_HEADER
precision mediump float;
uniform sampler2DRect tex0; // Main texture
uniform sampler2D tex1; // Bayer texture
uniform vec2 resolution;
uniform float time;
uniform int frame;
in vec2 varyingtexcoord;
out vec4 fragColor;
const float SIZE = 256.0;
const mat4 ditherMatrix = mat4(
1.0/17.0, 9.0/17.0, 3.0/17.0, 11.0/17.0,
13.0/17.0, 5.0/17.0, 15.0/17.0, 7.0/17.0,
4.0/17.0, 12.0/17.0, 2.0/17.0, 10.0/17.0,
16.0/17.0, 8.0/17.0, 14.0/17.0, 6.0/17.0
);
void main() {
vec2 uv = varyingtexcoord / resolution;
vec3 t = texture(tex0, varyingtexcoord).rgb;
vec2 fragCoord = gl_FragCoord.xy;
// color to display, use floating point precision here
vec3 col = t;
// Sample the Bayer 8x8 texture for the dither pattern
float ditherValue = texture(tex1, fragCoord / 8.0).r;
// Apply dithering with increased effect
col += (ditherValue - 0.5) / 2.0 * 2.0;
// Quantize the color
col = (floor(col * 8.0) + 0.5) / 8.0;
fragColor = vec4(col, 1.0);
}

16
bin/data/shaders/dither.vert

@ -0,0 +1,16 @@
OF_GLSL_SHADER_HEADER
uniform mat4 modelViewMatrix;
uniform mat4 projectionMatrix;
uniform mat4 textureMatrix;
uniform mat4 modelViewProjectionMatrix;
in vec4 position;
in vec2 texcoord;
out vec2 varyingtexcoord;
void main()
{
varyingtexcoord = texcoord;
gl_Position = modelViewProjectionMatrix * position;
}

25
bin/data/shaders/vertex_snap.frag

@ -0,0 +1,25 @@
#version 150
uniform sampler2D tex0;
in vec2 tex_c;
out vec4 outputColor;
float hash13(vec3 p3) {
p3 = fract(p3 * .1031);
p3 += dot(p3, p3.yzx + 19.19);
return fract((p3.x + p3.y) * p3.z);
}
void main()
{
vec4 texColor = texture(tex0, tex_c);
// Discard fragments with alpha below a threshold
if (texColor.a < 0.1) {
discard;
}
outputColor = texColor;
}

36
bin/data/shaders/vertex_snap.vert

@ -0,0 +1,36 @@
#version 150
uniform mat4 modelViewProjectionMatrix;
uniform float gridSize;
in vec2 texcoord;
in vec4 position;
out vec2 tex_c;
void main()
{
tex_c = texcoord;
vec2 resolution = vec2(1280.0, 960.0);
// Transform the vertex position to clip space
vec4 clipPos = modelViewProjectionMatrix * position;
// Perform perspective division
vec3 ndcPos = clipPos.xyz / clipPos.w;
// Convert to screen space
vec2 screenPos = (ndcPos.xy + 1.0) * 0.5 * resolution;
// Snap to grid
screenPos = floor(screenPos / gridSize) * gridSize;
// Convert back to NDC space
ndcPos.xy = (screenPos / resolution) * 2.0 - 1.0;
// Reconstruct the clip space position
clipPos = vec4(ndcPos * clipPos.w, clipPos.w);
gl_Position = clipPos;
}

BIN
bin/data/vp-tree.bin

Binary file not shown.

BIN
bin/image-to-mesh

Binary file not shown.

8
image-to-mesh.code-workspace

@ -101,7 +101,13 @@
"__split_buffer": "cpp", "__split_buffer": "cpp",
"__tree": "cpp", "__tree": "cpp",
"queue": "cpp", "queue": "cpp",
"stack": "cpp" "stack": "cpp",
"internet": "cpp",
"io_context": "cpp",
"__bit_reference": "cpp",
"filesystem": "cpp",
"executor": "cpp",
"timer": "cpp"
} }
} }
} }

2
src/Bullet.cpp

@ -23,7 +23,7 @@ void Bullet::setup(){
workerThreads.emplace_back(&Bullet::workerThreadFunction, this, i); workerThreads.emplace_back(&Bullet::workerThreadFunction, this, i);
} }
shader.load("vertex_snap"); shader.load("shaders/vertex_snap");
std::cout << workerThreads.size() << std::endl; std::cout << workerThreads.size() << std::endl;
} }

2
src/Bullet.h

@ -61,7 +61,7 @@ class Bullet{
void updateRepulsionNode() { void updateRepulsionNode() {
static int frameCount = 0; static int frameCount = 0;
static const int repulsionInterval = 60; // Apply repulsion every 60 frames static const int repulsionInterval = 180; // Apply repulsion every 60 frames
if (frameCount % repulsionInterval == 0 && !nodes.empty()) { if (frameCount % repulsionInterval == 0 && !nodes.empty()) {
repulsionNodeIndex = ofRandom(nodes.size()); repulsionNodeIndex = ofRandom(nodes.size());

40
src/network/Request.cpp

@ -0,0 +1,40 @@
#include "Request.h"
/* setup http server */
void Request::setup(std::string ip, int port, std::string page){
std::cout << "Initialising HTTP Server" << std::endl;
req.method = ofHttpRequest::POST;
req.url = "http://" + ip + ":" + ofToString(port) + "/" + page;
req.headers["Content-Type"] = "application/json";
}
/* send a request to vp_server & return frame/video/folder */
VPResp Request::query(Embedding& in){
VPResp vp_resp;
try {
req.body = "{\"vector\": [" +
ofToString(in.emotions["angry"]) + "," +
ofToString(in.emotions["disgust"]) + "," +
ofToString(in.emotions["fear"]) + "," +
ofToString(in.emotions["happy"]) + "," +
ofToString(in.emotions["sad"]) + "," +
ofToString(in.emotions["surprise"]) + "," +
ofToString(in.emotions["neutral"]) + "]}";
auto resp = http.handleRequest(req);
json_resp = ofJson::parse(resp.data.getText());
vp_resp.folder = json_resp["folder"];
vp_resp.image = json_resp["image"];
vp_resp.video = json_resp["video"];
vp_resp.frame = json_resp["frame"];
vp_resp.lost = json_resp["lost"];
past_vp_resp = vp_resp;
return vp_resp;
} catch (exception e) {
// Some issue happening here when plugging in controllers, or when they initially connect
return past_vp_resp;
}
}

44
src/network/Request.h

@ -0,0 +1,44 @@
#pragma once
#include "ofMain.h"
#include <string>
#include <unordered_map>
struct Embedding {
std::unordered_map<std::string, float> emotions;
Embedding() {
// Initialize with default values
emotions["angry"] = 0.0f;
emotions["disgust"] = 0.0f;
emotions["fear"] = 0.0f;
emotions["happy"] = 0.0f;
emotions["sad"] = 0.0f;
emotions["surprise"] = 0.0f;
emotions["neutral"] = 0.0f;
}
bool operator!=(const Embedding &other) const {
return emotions != other.emotions;
}
};
/* Vantage point query structure */
struct VPResp{
std::string folder;
std::string video;
std::string image;
std::string frame;
int lost;
};
class Request{
public:
void setup(std::string ip, int port, std::string page);
VPResp query(Embedding& in);
ofHttpRequest req;
ofURLFileLoader http;
ofJson json_resp;
VPResp past_vp_resp;
};

152
src/network/Server.cpp

@ -0,0 +1,152 @@
#include "Server.h"
void Server::start(){
std::cout << "Initialising TCP sever" << std::endl;
server.setup(port);
osc_sender.setup(OSC_HOST, OSC_PORT);
http.setup(http_ip, http_port, http_page);
is_active = true;
previous_embedding = embedding;
last_change_time = std::chrono::steady_clock::now();
}
void Server::update(){
for ( int i = 0; i < server.getLastID(); i++){
if (server.isClientConnected(i)) {
const int buffer_size = 8;
char buffer[buffer_size];
int bytes_recieved = server.receiveRawBytes(i, buffer, buffer_size);
if (bytes_recieved == buffer_size){
float value;
int id;
memcpy(&value, buffer, sizeof(float));
memcpy(&id, buffer + sizeof(float), sizeof(int));
std::string ip_address = server.getClientIP(i);
addOrUpdateClient(id, value, ip_address);
}
}
}
updateEmbedding();
checkActivity();
sendOSCMessage();
if(debug){
printClients();
}
}
void Server::addOrUpdateClient(int client_id, float value, const std::string& ip_address){
ClientInfo client;
client.ip_address = ip_address;
client.value = value;
clients[client_id] = client;
}
void Server::updateEmbedding() {
std::vector<std::string> emotionNames = {
"angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"
};
for (const auto& c : clients) {
const ClientInfo& info = c.second;
float val = std::round(info.value * 100.0f) / 100.0f;
if (c.first >= 0 && c.first < emotionNames.size()) {
embedding.emotions[emotionNames[c.first]] = val;
}
// Special case for neutral (index 6)
if (c.first == 6) {
embedding.emotions["fear"] = ofRandom(0.1, 0.6);
embedding.emotions["angry"] = ofRandom(0.01, 0.99);
embedding.emotions["happy"] = ofRandom(0.01, 0.99);
}
}
}
void Server::printClients(){
for( const auto& c : clients){
int id = c.first;
const ClientInfo& info = c.second;
std::cout << "id: " << id
<< ", value: " << info.value
<< ", IP: " << info.ip_address << std::endl;
}
std::cout << is_active << std::endl;
}
/* check if the controllers are in use */
void Server::checkActivity(){
if (previous_embedding.emotions["neutral"] != embedding.emotions["neutral"]) { // Check if embedding has changed
last_change_time = std::chrono::steady_clock::now(); // Reset the timer if there is a change
previous_embedding = embedding; // Update the previous embedding to the current one
is_active = true;
sendHttpRequest();
} else {
// Calculate the time since the last change
auto now = std::chrono::steady_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - last_change_time).count();
if (duration >= 2) {
is_active = false;
}
}
}
/* send osc msg, check if audio file exists and it is different to the past audiofile */
void Server::sendOSCMessage(){
std::vector<ofxOscMessage> messages;
ofxOscMessage me_0;
ofxOscMessage me_1;
ofxOscMessage me_2;
ofxOscMessage me_3;
ofxOscMessage me_file;
std::string audio_file = vp_resp.folder;
// Check if file exists in a given dir
ofFile file("/home/cailean/Desktop/rave/all_wav_files/" + audio_file + ".wav");
if(!is_active && (audio_file != past_audio_file) && file.exists()){
me_file.setAddress("/emote/filename");
me_file.addStringArg(audio_file + ".wav");
messages.push_back(me_file);
past_audio_file = audio_file;
}
me_0.setAddress("/emote/0");
me_0.addFloatArg(embedding.emotions["neutral"]);
messages.push_back(me_0);
me_1.setAddress("/emote/1");
me_1.addFloatArg(embedding.emotions["neutral"]);
messages.push_back(me_1);
me_2.setAddress("/emote/2");
me_2.addFloatArg(embedding.emotions["neutral"]);
messages.push_back(me_2);
me_3.setAddress("/emote/3");
me_3.addFloatArg(embedding.emotions["neutral"]);
messages.push_back(me_3);
for (auto& msg : messages){
osc_sender.sendMessage(msg, false);
}
}
/* sends request to http server if is_active = true */
void Server::sendHttpRequest(){
vp_resp = http.query(embedding);
}
std::vector<std::vector<double>> Server::generateRandomVectors(int count, int dimension){
return tree.generateRandomVectors(count, dimension);
}

58
src/network/Server.h

@ -0,0 +1,58 @@
#pragma once
#include "ofMain.h"
#include "ofxNetwork.h"
#include "Request.h"
#include <unordered_map>
#include <chrono>
#include "ofxOsc.h"
#include "../vp/VP.h"
#define OSC_HOST "127.0.0.1"
#define OSC_PORT 9002
struct ClientInfo {
float value;
std::string ip_address;
};
class Server{
public:
Server(int _port, Embedding _embedding, VP _tree, bool debug, std::string _http_ip, int _http_port, std::string _http_page)
: port(_port), embedding(_embedding), tree(_tree), debug(debug), http_ip(_http_ip), http_port(_http_port), http_page(_http_page) {}
void start();
void update();
void addOrUpdateClient(int client_id, float value, const std::string& ip_address);
void printClients();
void updateEmbedding();
void checkActivity();
void sendHttpRequest();
void sendOSCMessage();
std::vector<std::vector<double>> generateRandomVectors(int count, int dimension);
int port;
ofxTCPServer server;
std::unordered_map<int, ClientInfo> clients;
bool debug;
bool is_active;
Embedding embedding;
Request http;
std::string http_ip;
int http_port;
std::string http_page;
VPResp vp_resp;
private:
Embedding previous_embedding;
std::chrono::time_point<std::chrono::steady_clock> last_change_time;
ofxOscSender osc_sender;
std::string past_audio_file;
/* vp tree */
VP tree;
};

81
src/ofApp.cpp

@ -3,6 +3,7 @@
//-------------------------------------------------------------- //--------------------------------------------------------------
void ofApp::setup(){ void ofApp::setup(){
const int vectorCount = 10000; const int vectorCount = 10000;
const int dimension = 7; const int dimension = 7;
const int queryCount = 10; const int queryCount = 10;
@ -14,8 +15,39 @@ void ofApp::setup(){
vpt::VpTree tree = vp_tree.buildTree(vectors); vpt::VpTree tree = vp_tree.buildTree(vectors);
vp_tree.saveTree("./data/vp-tree.bin", tree); vp_tree.saveTree("./data/vp-tree.bin", tree);
// Generate query vectors /* allocated fbo's */
auto queries = vp_tree.generateRandomVectors(queryCount, dimension); map_fbo.allocate((ofGetWindowWidth() / 3) * 2, ofGetWindowHeight(), GL_RGB);
portrait_fbo.allocate((ofGetWindowWidth() / 3) * 1, ofGetWindowHeight(), GL_RGBA);
comp_fbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGBA);
model_outptut_fbo.allocate((ofGetWindowWidth() / 3) * 2, ofGetWindowHeight(), GL_RGB);
model_image.allocate((ofGetWindowWidth() / 3) * 2, ofGetWindowHeight(), OF_IMAGE_COLOR);
/* of settings (not sure why putting this at the start of setup breaks the map - but it works!) */
ofSetVerticalSync(true);
ofDisableArbTex();
ofEnableDepthTest();
ofDisableAntiAliasing();
ofEnableAlphaBlending();
/* load */
shaders.load("shaders/dither");
ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/image-to-mesh/bin/data/models/depth_anything_v2_vits.onnx";
/* setup */
bullet.setup();
depth_onnx.Setup(modelPath, true, true);
depth_thread.setup(&model_image, &model_outptut_fbo, &depth_onnx);
server = std::make_unique<Server>(12345, embed, vp_tree, false, "192.168.0.253", 2000, "search");
server->start();
/* vp-test */
auto queries = server->generateRandomVectors(queryCount, dimension);
for (const auto& query : queries) { for (const auto& query : queries) {
auto [distances, indices] = tree.getNearestNeighbors(query, k); auto [distances, indices] = tree.getNearestNeighbors(query, k);
@ -28,17 +60,8 @@ void ofApp::setup(){
} }
} }
record_fbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGB);
shader_fbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGBA);
out_fbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGBA);
ofSetVerticalSync(true);
ofDisableArbTex();
//ofEnableDepthTest();
ofDisableDepthTest();
ofDisableAntiAliasing();
ofEnableAlphaBlending();
shaders.load("dither");
/* mesh generation test */
std::string path = "images"; std::string path = "images";
ofDirectory dir(path); ofDirectory dir(path);
dir.allowExt("png"); dir.allowExt("png");
@ -58,7 +81,7 @@ void ofApp::setup(){
} }
} }
bullet.setup();
vector<ofMesh> mesh_list; vector<ofMesh> mesh_list;
@ -71,6 +94,23 @@ void ofApp::setup(){
//-------------------------------------------------------------- //--------------------------------------------------------------
void ofApp::update(){ void ofApp::update(){
if(ofGetFrameNum() < 1){
depth_thread.start();
}
/* write pixels to model input image */
map_fbo.readToPixels(map_pixels);
model_image.setFromPixels(map_pixels);
try{
depth_thread.update();
depth_onnx.SetPixels(model_outptut_fbo);
} catch (exception e){
std::cout << "Model did not run" << std::endl;
}
bullet.update(); bullet.update();
} }
@ -78,16 +118,16 @@ void ofApp::update(){
void ofApp::draw(){ void ofApp::draw(){
ofPushStyle(); ofPushStyle();
shader_fbo.begin(); map_fbo.begin();
ofClear(ofColor::grey); ofClear(ofColor::grey);
bullet.draw(); bullet.draw();
shader_fbo.end(); map_fbo.end();
ofPopStyle(); ofPopStyle();
ofTexture t = shader_fbo.getTexture(); ofTexture t = map_fbo.getTexture();
out_fbo.begin(); comp_fbo.begin();
shaders.begin(); shaders.begin();
shaders.setUniformTexture("tex0", t, 0); shaders.setUniformTexture("tex0", t, 0);
shaders.setUniformTexture("tex1", bayer, 1); shaders.setUniformTexture("tex1", bayer, 1);
@ -95,13 +135,14 @@ void ofApp::draw(){
shaders.setUniform1f("time", ofGetElapsedTimef()); shaders.setUniform1f("time", ofGetElapsedTimef());
shaders.setUniform1i("frame", ofGetFrameNum()); shaders.setUniform1i("frame", ofGetFrameNum());
ofClear(0, 0, 0, 0); ofClear(0, 0, 0, 0);
shader_fbo.draw(0,0); map_fbo.draw(0,0);
shaders.end(); shaders.end();
out_fbo.end(); comp_fbo.end();
out_fbo.draw(0,0); comp_fbo.draw(0,0);
model_outptut_fbo.draw(ofGetWindowWidth() / 2, 0);
//shader_fbo.draw(0, 0); //shader_fbo.draw(0, 0);
// float planeScale = 0.75; // float planeScale = 0.75;
// int planeWidth = ofGetWidth() * planeScale; // int planeWidth = ofGetWidth() * planeScale;

30
src/ofApp.h

@ -1,9 +1,15 @@
#pragma once #pragma once
#include "ofMain.h" #include "ofMain.h"
#include "MeshGenerator.h" #include "utils/MeshGenerator.h"
#include "Bullet.h" #include "Bullet.h"
#include "VP.h" #include "vp/VP.h"
#include <onnxruntime_cxx_api.h>
#include "onnx/Onnx.h"
#include "onnx/Yolo.h"
#include "onnx/ModelThread.h"
#include "network/Request.h"
#include "network/Server.h"
class ofApp : public ofBaseApp{ class ofApp : public ofBaseApp{
@ -31,13 +37,27 @@ class ofApp : public ofBaseApp{
Bullet bullet; Bullet bullet;
vector<ofImage> images; vector<ofImage> images;
ofFbo record_fbo; ofFbo map_fbo;
ofFbo shader_fbo; ofFbo portrait_fbo;
ofFbo out_fbo; ofFbo comp_fbo;
ofFbo model_outptut_fbo;
ofShader shaders; ofShader shaders;
ofPlanePrimitive plane; ofPlanePrimitive plane;
ofTexture bayer; ofTexture bayer;
VP vp_tree; VP vp_tree;
/* onnx */
Onnx depth_onnx;
ModelThread depth_thread;
ofImage model_image;
ofPixels map_pixels;
/* the current embedding */
Embedding embed;
/* server */
std::unique_ptr<Server> server;
}; };

99
src/onnx/ModelThread.h

@ -0,0 +1,99 @@
#include "ofMain.h"
#include "Onnx.h"
#include "Yolo.h"
class ModelThread : public ofThread
{
public:
ofImage* img;
ofFbo* fbo;
Onnx* model;
Yolo* yolo;
std::vector<types::BoxfWithLandmarks>* detected_faces;
std::string model_type;
// emotional recognition model
std::vector<ofImage>* croppedFaces;
float* emotional_data;
~ModelThread(){
stop();
waitForThread(false);
}
void setup(ofImage* _img, ofFbo* _fbo, Onnx* _model){
std::lock_guard<std::mutex> lock(mutex);
this->img = _img;
this->fbo = _fbo;
this->model = _model;
this->model_type = "depth";
}
void setupYolo(ofImage* _img, std::vector<types::BoxfWithLandmarks>* _detected_faces, Onnx* _model, Yolo* _yolo){
std::lock_guard<std::mutex> lock(mutex);
this->img = _img;
this->detected_faces = _detected_faces;
this->model_type = "yolo";
this->model = _model;
this->yolo = _yolo;
}
void start(){
startThread();
}
void stop(){
stopThread();
condition.notify_all();
}
void threadedFunction(){
while(isThreadRunning()){
if(model_type == "depth"){
std::unique_lock<std::mutex> lock(mutex);
inferDepthImage(fbo, img, model);
condition.wait(lock);
} else if(model_type == "yolo") {
std::unique_lock<std::mutex> lock(mutex);
inferYolo();
condition.wait(lock);
}
}
}
void update(){
std::lock_guard<std::mutex> lock(mutex);
condition.notify_one();
}
void inferYolo(){
auto output_tensors_face = model->Run(*img);
auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
unsigned int num_anchors = output_faces[1]; // Number of anchors
float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
yolo->ParseOutput(output_face_ptr, *detected_faces, num_anchors);
}
void inferDepthImage(ofFbo* fbo, ofImage* img, Onnx* model){
auto output_tensors = model->Run(*img);
float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
float min_value = model->ReduceMin(output_ptr, num_elements);
float max_value = model->ReduceMax(output_ptr, num_elements);
model->Normalize(output_ptr, num_elements, min_value, max_value);
model->DataToFbo(output_ptr, 518, 518, *fbo);
}
protected:
std::condition_variable condition;
};

272
src/onnx/Onnx.cpp

@ -0,0 +1,272 @@
#include "Onnx.h"
#include <cmath>
// Setups the model. CUDA is enabled by default
void Onnx::Setup(ORTCHAR_T* modelPath, bool isLog, bool useCuda){
session_options.SetIntraOpNumThreads(1);
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
log = isLog;
// cuda setup
if(useCuda){
OrtCUDAProviderOptions opts;
opts.device_id = 0;
opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
opts.do_copy_in_default_stream = 0;
opts.arena_extend_strategy = 0;
session_options.AppendExecutionProvider_CUDA(opts);
}
ort_session = std::make_shared<Ort::Session>(ort_env, modelPath, session_options);
Ort::AllocatorWithDefaultOptions allocator;
for (std::size_t i = 0; i < ort_session->GetInputCount(); i++) {
input_node_names.emplace_back(ort_session->GetInputNameAllocated(i, allocator).get());
input_node_dims = ort_session->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
if(log)
std::cout << "\t" << input_node_names.at(i) << " : " << PrintShape(input_node_dims) << std::endl;
}
// some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size.
for (auto& s : input_node_dims) {
if (s < 0) {
s = 1;
if(log)
std::cout << "transfromed!" << std::endl;
}
}
if(log)
std::cout << "Output Node Name/Shape (" << output_node_names.size() << "):" << std::endl;
for (std::size_t i = 0; i < ort_session->GetOutputCount(); i++) {
output_node_names.emplace_back(ort_session->GetOutputNameAllocated(i, allocator).get());
auto output_shapes = ort_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
if(log)
std::cout << "\t" << output_node_names.at(i) << " : " << PrintShape(output_shapes) << std::endl;
}
}
// Runs the model, given an image
std::vector<Ort::Value> Onnx::Run(ofImage &img){
auto start = std::chrono::high_resolution_clock::now();
TransformImage(img);
size_t input_tensor_size = image_array.total();
std::vector<Ort::Value> input_tensors;
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(mem_info, (float*)image_array.data,
input_tensor_size, input_node_dims.data(), input_node_dims.size()));
// double-check the dimensions of the input tensor
assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == input_node_dims);
//std::cout << "\ninput_tensor shape: " << PrintShape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) << std::endl;
// pass data through model
std::vector<const char*> input_names_char(input_node_names.size(), nullptr);
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char),
[&](const std::string& str) { return str.c_str(); });
std::vector<const char*> output_names_char(output_node_names.size(), nullptr);
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char),
[&](const std::string& str) { return str.c_str(); });
try {
auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_names_char.data(), input_tensors.data(),
input_names_char.size(), output_names_char.data(), output_names_char.size());
if (timeStamp) {
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> elapsed = end - start;
std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
}
return output_tensors;
} catch (const Ort::Exception& exception) {
std::cout << "ERROR running model inference: " << exception.what() << std::endl;
return input_tensors;
}
}
/*
Runs a model, with a batch of images as input (emotion.onnx)
(1) Creates a 1-dim float array based on the batch * channels * width * height)
(2) Transforms each image into a cv::Mat, and appends it to the array
(3) Sends that information to the model to be processed.
*/
std::vector<Ort::Value> Onnx::RunBatch(std::vector<ofImage>& images){
auto start = std::chrono::high_resolution_clock::now();
// Number of images in the batch
size_t batchSize = images.size();
std::vector<int64_t> batch_node_dims = {static_cast<int64_t>(batchSize), 3, 260, 260};
std::vector<float> batch_image_array(static_cast<int64_t>(batchSize) * 3 * 260 * 260);
for(size_t i = 0; i < batchSize; i++){
TransformImage(images[i]);
// Copy the image data into the batch_image_array
std::memcpy(
batch_image_array.data() + i * 3 * 260 * 260, // Destination: starting point for this image in the batch array
image_array.data, // Source: pixel data in the cv::Mat
3 * 260 * 260 * sizeof(float) // Size: number of bytes to copy (3 channels * 260 * 260 pixels)
);
}
size_t input_tensor_size = batch_image_array.size();
std::vector<Ort::Value> input_tensors;
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(mem_info, batch_image_array.data(),
input_tensor_size, batch_node_dims.data(), batch_node_dims.size()));
// double-check the dimensions of the input tensor
assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == batch_node_dims);
std::cout << "\ninput_tensor shape: " << PrintShape(input_tensors[0].GetTensorTypeAndShapeInfo().GetShape()) << std::endl;
// pass data through model
std::vector<const char*> input_names_char(input_node_names.size(), nullptr);
std::transform(std::begin(input_node_names), std::end(input_node_names), std::begin(input_names_char),
[&](const std::string& str) { return str.c_str(); });
std::vector<const char*> output_names_char(output_node_names.size(), nullptr);
std::transform(std::begin(output_node_names), std::end(output_node_names), std::begin(output_names_char),
[&](const std::string& str) { return str.c_str(); });
try {
auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_names_char.data(), input_tensors.data(),
input_names_char.size(), output_names_char.data(), output_names_char.size());
if (timeStamp) {
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> elapsed = end - start;
std::cout << "Update loop took " << elapsed.count() << " ms" << std::endl;
}
return output_tensors;
} catch (const Ort::Exception& exception) {
std::cout << "ERROR running model inference: " << exception.what() << std::endl;
return input_tensors;
}
}
// Transforms an ofImage into a cv::ImageArray
void Onnx::TransformImage(ofImage &img){
// Convert ofImage to cv::Mat
ofPixels &pixels = img.getPixels(); // Get pixels from ofImage
int width = img.getWidth();
int height = img.getHeight();
int channels = img.getPixels().getNumChannels();
// Create a cv::Mat from the pixel data
cv::Mat cvImg = cv::Mat(height, width, (channels == 3) ? CV_8UC3 : CV_8UC1, pixels.getData());
// Convert cv::Mat to cv::InputArray
cv::InputArray inputArray(cvImg);
image_array = cv::dnn::blobFromImage(inputArray, 1 / 255.0, cv::Size(input_node_dims[2] , input_node_dims[3] ), (0, 0, 0), false, false);
}
/* Generates a random 1 dimensional float array, creating values between 0->255.
Returns a tensor. */
Ort::Value Onnx::GenerateTensor(){
std::vector<float> random_input_tensor_values(CalculateProduct(input_node_dims));
std::generate(random_input_tensor_values.begin(), random_input_tensor_values.end(), [&] { return rand() % 255; });
return VectorToTensor(random_input_tensor_values, input_node_dims);
}
// Calculates the product of the vector, how many values.
int Onnx::CalculateProduct(const std::vector<std::int64_t>& v){
int total = 1;
for (auto& i : v) total *= i;
return total;
}
// Creates a tensor from a given vector input.
Ort::Value Onnx::VectorToTensor(std::vector<float>& data, const std::vector<std::int64_t>& shape){
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
auto tensor = Ort::Value::CreateTensor<float>(mem_info, data.data(), data.size(), shape.data(), shape.size());
return tensor;
}
// Prints the shape of the given tensor (ex. input: (1, 1, 512, 512))
std::string Onnx::PrintShape(const std::vector<std::int64_t>& v){
std::stringstream ss("");
for (std::size_t i = 0; i < v.size() - 1; i++) ss << v[i] << "x";
ss << v[v.size() - 1];
return ss.str();
}
// Function to calculate the minimum value in an array
float Onnx::ReduceMin(const float* data, size_t size) {
return *std::min_element(data, data + size);
}
// Function to calculate the maximum value in an array
float Onnx::ReduceMax(const float* data, size_t size) {
return *std::max_element(data, data + size);
}
// Function to normalize an array between 0 and 1
void Onnx::Normalize(float* data, size_t size, float min_value, float max_value) {
for (size_t i = 0; i < size; ++i) {
data[i] = (data[i] - min_value) / (max_value - min_value);
}
}
// Coverts the output tensor data to a texture of a given ofFbo.
void Onnx::DataToFbo(float* data, size_t width, size_t height, ofFbo& fbo){
// Convert data into opencv mat
cv::Mat inputMat(height, width, CV_32FC1);
memcpy(inputMat.data, data, width * height * sizeof(float));
// // Convert to 8-bit grayscale Mat
cv::Mat inputMat8U;
inputMat.convertTo(inputMat8U, CV_8UC1, 255.0); // Convert float to 8-bit grayscale
// // Resize the image using OpenCV
cv::Mat resizedMat;
cv::resize(inputMat8U, resizedMat, cv::Size(fbo.getWidth(), fbo.getHeight()), 0, 0, cv::INTER_LINEAR);
// // Convert OpenCV Mat to ofPixels
pixels.allocate(fbo.getWidth(), fbo.getHeight(), OF_PIXELS_GRAY);
// // Copy data from resizedMat to ofPixels
memcpy(pixels.getData(), resizedMat.data, fbo.getWidth() * fbo.getHeight());
}
void Onnx::SetPixels(ofFbo& fbo){
fbo.begin();
ofTexture& texture = fbo.getTexture();
texture.loadData(pixels);
fbo.end();
}
void Onnx::Softmax(float* data, size_t size) {
std::vector<float> logits(data, data + size);
std::vector<float> expValues(size);
float maxLogit = *std::max_element(logits.begin(), logits.end());
// Calculate exp(logit - maxLogit) for numerical stability
std::transform(logits.begin(), logits.end(), expValues.begin(),
[maxLogit](float logit) { return std::exp(logit - maxLogit); });
float sumExp = std::accumulate(expValues.begin(), expValues.end(), 0.0f);
// Normalize to get probabilities
std::transform(expValues.begin(), expValues.end(), data,
[sumExp](float expValue) { return expValue / sumExp; });
}

56
src/onnx/Onnx.h

@ -0,0 +1,56 @@
#pragma once
#include <onnxruntime_cxx_api.h>
#include <algorithm> // std::generate
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <cstdlib> // For std::rand and std::srand
#include "ofMain.h"
#include "ofxOpenCv.h"
#include <numeric>
#include <cmath>
class Onnx {
public:
Onnx() {}
void Setup(ORTCHAR_T* modelPath, bool isLog, bool useCuda);
std::vector<Ort::Value> Run(ofImage &img);
std::vector<Ort::Value> RunBatch(std::vector<ofImage> &images);
std::string PrintShape(const std::vector<std::int64_t>& v);
Ort::Value VectorToTensor(std::vector<float>& data, const std::vector<std::int64_t>& shape);
Ort::Value GenerateTensor();
int CalculateProduct(const std::vector<std::int64_t>& v);
void TransformImage(ofImage &img);
float ReduceMin(const float* data, size_t size);
float ReduceMax(const float* data, size_t size);
void Normalize(float* data, size_t size, float min_value, float max_value);
void DataToFbo(float* data, size_t width, size_t height, ofFbo& fbo);
void Softmax(float* data, size_t size);
void SetPixels(ofFbo& fbo);
bool timeStamp = false;
bool log = false;
ofPixels pixels;
protected:
Ort::Env ort_env;
Ort::SessionOptions session_options;
cv::Mat image_array;
std::shared_ptr<Ort::Session> ort_session;
std::vector<std::string> input_node_names;
std::vector<int64_t> input_node_dims; // 1 input only.
std::size_t input_tensor_size = 1;
std::vector<float> input_values_handler;
Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
std::vector<std::string> output_node_names;
std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
std::vector<Ort::Value> output_values;
Ort::Value dummy_tensor{ nullptr };
std::vector<ofImage> imageBatch;
int num_outputs = 1;
};

157
src/onnx/Yolo.cpp

@ -0,0 +1,157 @@
#include "Yolo.h"
// Takes output tensor data, processes that data, and returns an array of BoxfWithLandmarks (detected_faces)
void Yolo::ParseOutput(float* &output_tensors, std::vector<types::BoxfWithLandmarks> &sorted_faces, unsigned int num_anchors) {
std::vector<types::BoxfWithLandmarks> detected_faces;
for (unsigned int i = 0; i < num_anchors; ++i) {
const float *row_ptr = output_tensors + i * 16; // Each row contains 16 values
float obj_conf = row_ptr[4]; // Objectness confidence
if (obj_conf < 0.5) continue; // Filter out low-confidence detections
// Extract bounding box, confidence, and landmarks
float cx = row_ptr[0];
float cy = row_ptr[1];
float w = row_ptr[2];
float h = row_ptr[3];
float cls_conf = row_ptr[15]; // Face confidence score
if (cls_conf < 0.5) continue; // Filter by class confidence
types::BoxfWithLandmarks face;
face.box.x1 = cx - w / 2;
face.box.y1 = cy - h / 2;
face.box.x2 = cx + w / 2;
face.box.y2 = cy + h / 2;
face.box.score = cls_conf;
face.box.label_text = "face";
// Extract landmarks
for (int j = 0; j < 10; j += 2) {
face.landmarks.points.push_back(cv::Point2f(row_ptr[5 + j], row_ptr[5 + j + 1]));
}
detected_faces.push_back(face); // Store the detected face with landmarks
}
// Apply NMS to detected faces list to remove any overlapping bounding boxes.
NonMaximumSuppression(detected_faces, sorted_faces, 0.5);
// Sort faces based on confidence value
SortDetectedFaces(sorted_faces);
}
// Simple helper for drawing boxes given x1, y1, x2, y2 coordinates.
void Yolo::DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces){
for (const auto &face : detected_faces) {
ofNoFill();
float w = ofGetWindowWidth() / 2;
ofDrawRectangle(face.box.x1 + w, face.box.y1, ((face.box.x2 + w) - (face.box.x1 + w)), face.box.y2 - face.box.y1);
}
}
// Simple helper to draw boxes at the center of the detected face.
void Yolo::DrawCenter(std::vector<types::BoxfWithLandmarks> &detected_faces){
ofNoFill();
glm::vec2 position = detected_faces[0].box.center;
ofDrawCircle(position.x + ofGetWindowWidth() / 2, position.y, 5);
}
// Applies NMS to an array of BoxfWithLandmarks.face.boxes, to remove any overlapping bounding boxes.
void Yolo::NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_faces, std::vector<types::BoxfWithLandmarks> &output_faces, float iou_threshold)
{
// Sort the boxes by their confidence scores (highest to lowest)
std::sort(input_faces.begin(), input_faces.end(),
[](const types::BoxfWithLandmarks &a, const types::BoxfWithLandmarks &b) {
return a.box.score > b.box.score;
});
std::vector<int> suppressed(input_faces.size(), 0); // Suppression mask
// Iterate through the boxes
for (size_t i = 0; i < input_faces.size(); ++i) {
if (suppressed[i]) continue; // Skip already suppressed boxes
// Add this box to the output
output_faces.push_back(input_faces[i]);
for (size_t j = i + 1; j < input_faces.size(); ++j) {
if (suppressed[j]) continue;
// Calculate IoU between box i and box j
float iou = input_faces[i].box.iou_of(input_faces[j].box);
// Suppress box j if IoU is greater than the threshold
if (iou > iou_threshold) {
suppressed[j] = 1;
}
}
}
}
// Scales the coordinates of detected faces from the model output dimensions -> the original input image dimensions.
void Yolo::ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height){
float width_scale = static_cast<float>(original_width) / modelSize;
float height_scale = static_cast<float>(original_height) / modelSize;
for (auto &face : detected_faces) {
// Convert bounding box coordinates
face.box.x1 *= width_scale;
face.box.y1 *= height_scale;
face.box.x2 *= width_scale;
face.box.y2 *= height_scale;
face.box.UpdateCenter();
// Convert landmarks
for (size_t j = 0; j < face.landmarks.points.size(); ++j) {
face.landmarks.points[j].x *= width_scale;
face.landmarks.points[j].y *= height_scale;
}
}
}
void Yolo::CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face, ofxCvColorImage &colorImage){
colorImage.resetROI();
// Calculate the coordinates and dimensions of the face box
float x1 = face.box.x1;
float y1 = face.box.y1;
float x2 = face.box.x2;
float y2 = face.box.y2;
// Ensure coordinates are within the input image bounds
x1 = ofClamp(x1, 0.0f, (float)inputImage.getWidth());
y1 = ofClamp(y1, 0.0f, (float)inputImage.getHeight());
x2 = ofClamp(x2, 0.0f, (float)inputImage.getWidth());
y2 = ofClamp(y2, 0.0f, (float)inputImage.getHeight());
// Calculate width and height of the cropped area
float cropWidth = x2 - x1;
float cropHeight = y2 - y1;
// Create cropped section, defined by the box coords
ofFbo tempFbo;
tempFbo.allocate(cropWidth, cropHeight, GL_RGB);
tempFbo.begin();
ofClear(0);
inputImage.getTexture().drawSubsection(0, 0, cropWidth, cropHeight, x1, y1);
tempFbo.end();
ofFloatPixels pix;
tempFbo.readToPixels(pix);
colorImage.setFromPixels(pix);
colorImage.resize(260, 260);
}
void Yolo::SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces){
std::sort(detectedFaces.begin(), detectedFaces.end(),
[](const types::BoxfWithLandmarks &a, const types::BoxfWithLandmarks &b) {
return a.box.score > b.box.score; // Sort in descending order
});
}

81
src/onnx/Yolo.h

@ -0,0 +1,81 @@
#ifndef YOLO
#define YOLO
#include "ofMain.h"
#include "ofxOpenCv.h"
#include <onnxruntime_cxx_api.h>
#include <algorithm>
struct Emotef{
float emotions[7];
};
namespace types {
/*
Struct for storing information about detetced faces.
*/
struct Boxf {
float x1, y1, x2, y2; // Coordinates of the bounding box
float score; // Confidence score
glm::vec2 center;
int label; // Class label (e.g., "face")
std::string label_text;
Emotef emotional_state;
// Calculate Intersection over Union (IoU) with another box
float iou_of(const Boxf &other) const {
float intersection_x1 = std::max(x1, other.x1);
float intersection_y1 = std::max(y1, other.y1);
float intersection_x2 = std::min(x2, other.x2);
float intersection_y2 = std::min(y2, other.y2);
float intersection_area = std::max(0.0f, intersection_x2 - intersection_x1) *
std::max(0.0f, intersection_y2 - intersection_y1);
float this_area = (x2 - x1) * (y2 - y1);
float other_area = (other.x2 - other.x1) * (other.y2 - other.y1);
float union_area = this_area + other_area - intersection_area;
return intersection_area / union_area;
}
void UpdateCenter(){
center.x = (x1 + x2) / 2;
center.y = (y1 + y2) / 2;
}
void SetEmotionState(float* emotional_data){
std::copy(emotional_data, emotional_data + 7, emotional_state.emotions);
}
};
struct Landmarks {
std::vector<cv::Point2f> points; // Facial landmarks points (e.g., eyes, nose, mouth)
bool flag = false; // Indicator if landmarks are available
};
struct BoxfWithLandmarks {
Boxf box; // Bounding box for the face
Landmarks landmarks; // Landmark points for the face
bool flag = false; // Indicator if this detection is valid
};
}
class Yolo{
public:
Yolo(){};
void ParseOutput(float* &out_ptr, std::vector<types::BoxfWithLandmarks> &sorted_faces, unsigned int num_anchors);
void DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces);
void DrawCenter(std::vector<types::BoxfWithLandmarks> &detected_faces);
void NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_faces, std::vector<types::BoxfWithLandmarks> &output_faces, float iou_threshold);
void ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height);
void CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face, ofxCvColorImage &colorImage);
void SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces);
private:
// Input dimenions of the model -- used for coordinate scaling.
size_t modelSize = 640;
};
#endif

0
src/MeshGenerator.cpp → src/utils/MeshGenerator.cpp

0
src/MeshGenerator.h → src/utils/MeshGenerator.h

0
src/VP.cpp → src/vp/VP.cpp

0
src/VP.h → src/vp/VP.h

0
src/VpTree.hpp → src/vp/VpTree.hpp

Loading…
Cancel
Save