|
|
@ -2,33 +2,47 @@ |
|
|
|
|
|
|
|
//--------------------------------------------------------------
|
|
|
|
void ofApp::setup(){ |
|
|
|
/* ofSettings */ |
|
|
|
ofDisableArbTex(); |
|
|
|
ofSetFrameRate(60); |
|
|
|
// ofSetVerticalSync(true);
|
|
|
|
ofSetFrameRate(24); |
|
|
|
ofSetVerticalSync(true); |
|
|
|
window_width = ofGetWindowWidth(); |
|
|
|
window_height = ofGetWindowHeight(); |
|
|
|
|
|
|
|
/* load font */ |
|
|
|
tf.load("data/fonts/jetbrainsmono-regular.ttf", 20); |
|
|
|
|
|
|
|
map.Setup(); |
|
|
|
/* load shader */ |
|
|
|
depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag"); |
|
|
|
|
|
|
|
//player.Setup();
|
|
|
|
//player.SetVideo("videos/demo.mp4", fbo);
|
|
|
|
/* setup map */ |
|
|
|
map.Setup(); |
|
|
|
|
|
|
|
emoteImage.allocate(260, 260); |
|
|
|
tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR); |
|
|
|
/* setup video */ |
|
|
|
player.Setup(); |
|
|
|
player.SetVideo("videos/demo.mp4", model_output_fbo_1); |
|
|
|
|
|
|
|
/* setup models (modelPath, log, useCuda) */ |
|
|
|
ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vitb.onnx"; |
|
|
|
ORTCHAR_T* modelPath2 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/yolov5s-face.onnx"; |
|
|
|
ORTCHAR_T* modelPath3 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/rgb_emotion.onnx"; |
|
|
|
ORTCHAR_T* modelPath4 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vits.onnx"; |
|
|
|
|
|
|
|
/* Setup Models (modelPath, log, useCuda) */ |
|
|
|
yolo.Setup(modelPath2, false, true); |
|
|
|
depth.Setup(modelPath, false, true); |
|
|
|
depth_small.Setup(modelPath4, false, true); |
|
|
|
emotion.Setup(modelPath3, false, true); |
|
|
|
|
|
|
|
/* Load shader, allocated rampedFbo */ |
|
|
|
depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag"); |
|
|
|
fbo.allocate(1600, 800, GL_RGB); |
|
|
|
rampedFbo.allocate(1600, 800); |
|
|
|
/* Depth output fbo */ |
|
|
|
model_output_fbo.allocate(window_width / 2, window_height, GL_RGB); |
|
|
|
|
|
|
|
/* Shader output */ |
|
|
|
rampedFbo.allocate(window_width, window_height); |
|
|
|
|
|
|
|
emoteImage.allocate(260, 260); |
|
|
|
tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR); |
|
|
|
|
|
|
|
screen_fbo.allocate(window_width, window_height, GL_RGB); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -37,7 +51,6 @@ void ofApp::setup(){ |
|
|
|
void ofApp::update(){ |
|
|
|
/* Check to see if the application has moved to the first frame
|
|
|
|
As the models need to load first, as the first inference is quite slow */ |
|
|
|
auto start = std::chrono::high_resolution_clock::now(); |
|
|
|
if(ofGetFrameNum() > 0) |
|
|
|
firstRun = false; |
|
|
|
|
|
|
@ -52,35 +65,29 @@ void ofApp::update(){ |
|
|
|
} |
|
|
|
|
|
|
|
/* Setup model input using ofImage, allocated fbo */ |
|
|
|
//player.Update(img);
|
|
|
|
//img.setFromPixels(player.GetVideoPixels());
|
|
|
|
|
|
|
|
player.Update(img); |
|
|
|
img.setFromPixels(player.GetVideoPixels()); |
|
|
|
|
|
|
|
/* Run Models */ |
|
|
|
try{ |
|
|
|
|
|
|
|
|
|
|
|
auto output_tensors = depth.Run(map.fboImage); |
|
|
|
float* output_ptr = output_tensors.front().GetTensorMutableData<float>(); |
|
|
|
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount(); |
|
|
|
|
|
|
|
float min_value = depth.ReduceMin(output_ptr, num_elements); |
|
|
|
float max_value = depth.ReduceMax(output_ptr, num_elements); |
|
|
|
|
|
|
|
depth.Normalize(output_ptr, num_elements, min_value, max_value); |
|
|
|
|
|
|
|
depth.DataToFbo(output_ptr, 518, 518, fbo); |
|
|
|
// map
|
|
|
|
inferDepthImage(model_output_fbo, map.fboImage, depth); |
|
|
|
|
|
|
|
auto output_tensors_face = yolo.Run(map.fboImage); |
|
|
|
// video player
|
|
|
|
inferDepthImage(model_output_fbo_1, img, depth_small); |
|
|
|
|
|
|
|
// auto output_tensors_face = yolo.Run(model_input_img);
|
|
|
|
|
|
|
|
auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape(); |
|
|
|
// auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
|
|
|
|
|
|
|
|
unsigned int num_anchors = output_faces[1]; // Number of anchors
|
|
|
|
// unsigned int num_anchors = output_faces[1]; // Number of anchors
|
|
|
|
|
|
|
|
float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>(); |
|
|
|
// float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
|
|
|
|
|
|
|
|
faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors); |
|
|
|
// faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);
|
|
|
|
|
|
|
|
faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, fbo.getWidth(), fbo.getHeight()); |
|
|
|
// faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, outFbo.getWidth(), outFbo.getHeight());
|
|
|
|
|
|
|
|
/* As no input is generated for the emotion recognition model, run a dummy vector through the model
|
|
|
|
So it can load */ |
|
|
@ -112,30 +119,59 @@ void ofApp::update(){ |
|
|
|
std::cout << "Model did not run" << std::endl; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
auto end = std::chrono::high_resolution_clock::now(); |
|
|
|
std::chrono::duration<float> duration = end - start; |
|
|
|
std::cout << "Time taken for Update: " << duration.count() << " seconds" << std::endl; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//--------------------------------------------------------------
|
|
|
|
void ofApp::draw(){ |
|
|
|
map.Draw(); |
|
|
|
|
|
|
|
screen_fbo.begin(); |
|
|
|
|
|
|
|
// Calculate the target width and height for model_output_fbo_1
|
|
|
|
float fbo_1_target_width = window_width * 0.5; // 1/2 of the screen width (990px)
|
|
|
|
float fbo_1_target_height = window_height; // Full height of the screen
|
|
|
|
|
|
|
|
// Calculate the aspect ratio of the video and the FBO
|
|
|
|
float video_aspect_ratio = model_output_fbo_1.getWidth() / model_output_fbo_1.getHeight(); |
|
|
|
float fbo_aspect_ratio = fbo_1_target_width / fbo_1_target_height; |
|
|
|
|
|
|
|
// Adjust the scaling to cover the FBO area while maintaining aspect ratio
|
|
|
|
float new_width, new_height; |
|
|
|
if (fbo_aspect_ratio > video_aspect_ratio) { |
|
|
|
// FBO is wider; scale by width to fill the FBO
|
|
|
|
new_width = fbo_1_target_width; |
|
|
|
new_height = new_width / video_aspect_ratio; // Scale height to maintain aspect ratio
|
|
|
|
} else { |
|
|
|
// FBO is taller; scale by height to fill the FBO
|
|
|
|
new_height = fbo_1_target_height; |
|
|
|
new_width = new_height * video_aspect_ratio; // Scale width to maintain aspect ratio
|
|
|
|
} |
|
|
|
|
|
|
|
// Center the video to ensure it fills the FBO and is cropped if necessary
|
|
|
|
float x_pos = (window_width * 0.75) - (new_width / 2); |
|
|
|
float y_pos = (window_height - new_height) / 2; // Center vertically
|
|
|
|
|
|
|
|
// Draw the scaled video inside the FBO
|
|
|
|
model_output_fbo_1.draw(x_pos, y_pos, new_width, new_height); |
|
|
|
|
|
|
|
model_output_fbo.draw(0, 0); |
|
|
|
|
|
|
|
screen_fbo.end(); |
|
|
|
|
|
|
|
renderDepthMap(); |
|
|
|
|
|
|
|
if(!firstRun && detected_faces.size() != 0){ |
|
|
|
faceDetector.DrawBox(detected_faces); |
|
|
|
faceDetector.DrawCenter(detected_faces); |
|
|
|
} |
|
|
|
// if(!firstRun && detected_faces.size() != 0){
|
|
|
|
// faceDetector.DrawBox(detected_faces);
|
|
|
|
// faceDetector.DrawCenter(detected_faces);
|
|
|
|
// }
|
|
|
|
|
|
|
|
ofPushMatrix(); |
|
|
|
ofSetColor(255); |
|
|
|
ofSetBackgroundColor(0); |
|
|
|
tf.drawString(std::to_string(ofGetFrameRate()), 10, 30); |
|
|
|
ofPopMatrix(); |
|
|
|
|
|
|
|
// emoteImage.draw(640, 0);
|
|
|
|
// for(auto& face : detected_faces){
|
|
|
|
// ofDrawBitmapString(std::to_string(face.box.emotional_state.emotions[0]), 700, 300);
|
|
|
@ -143,6 +179,19 @@ void ofApp::draw(){ |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
void ofApp::inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model){ |
|
|
|
auto output_tensors = model.Run(img); |
|
|
|
float* output_ptr = output_tensors.front().GetTensorMutableData<float>(); |
|
|
|
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount(); |
|
|
|
|
|
|
|
float min_value = model.ReduceMin(output_ptr, num_elements); |
|
|
|
float max_value = model.ReduceMax(output_ptr, num_elements); |
|
|
|
|
|
|
|
model.Normalize(output_ptr, num_elements, min_value, max_value); |
|
|
|
|
|
|
|
model.DataToFbo(output_ptr, 518, 518, fbo); |
|
|
|
} |
|
|
|
|
|
|
|
//--------------------------------------------------------------
|
|
|
|
void ofApp::inferEmotionalState(){ |
|
|
|
|
|
|
@ -184,15 +233,17 @@ void ofApp::inferEmotionalState(){ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/*
|
|
|
|
Depth Map Shader Pass |
|
|
|
*/ |
|
|
|
void ofApp::renderDepthMap(){ |
|
|
|
rampedFbo.begin(); |
|
|
|
|
|
|
|
depthToColourShader.begin(); |
|
|
|
depthToColourShader.setUniformTexture("tex0", fbo.getTexture(), 0); |
|
|
|
depthToColourShader.setUniformTexture("tex1", map.fboImage.getTexture(), 1); |
|
|
|
depthToColourShader.setUniformTexture("tex0", screen_fbo.getTexture(), 0); |
|
|
|
depthToColourShader.setUniform1f("texW", rampedFbo.getWidth()); |
|
|
|
depthToColourShader.setUniform1f("texH", rampedFbo.getHeight()); |
|
|
|
fbo.draw(0, 0); |
|
|
|
screen_fbo.draw(0, 0); |
|
|
|
depthToColourShader.end(); |
|
|
|
|
|
|
|
rampedFbo.end(); |
|
|
|