From 589734cce9848d21511bbc8640f819d79e48247b Mon Sep 17 00:00:00 2001 From: cailean Date: Fri, 4 Oct 2024 14:36:14 +0100 Subject: [PATCH] before image cropping pre-inference --- src/Map.cpp | 7 ++- src/Player.cpp | 26 +++++++-- src/Player.h | 3 +- src/main.cpp | 2 +- src/ofApp.cpp | 141 +++++++++++++++++++++++++++++++++---------------- src/ofApp.h | 12 ++++- 6 files changed, 136 insertions(+), 55 deletions(-) diff --git a/src/Map.cpp b/src/Map.cpp index d3a59eb..2538f4e 100644 --- a/src/Map.cpp +++ b/src/Map.cpp @@ -18,9 +18,9 @@ void Map::Setup(){ SetupTSNE(); } - mapFbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGB); + mapFbo.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), GL_RGB); - fboImage.allocate(ofGetWindowWidth(), ofGetWindowHeight(), OF_IMAGE_COLOR); + fboImage.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), OF_IMAGE_COLOR); Setup3D(); @@ -66,9 +66,8 @@ void Map::Draw(){ mapFbo.end(); mapFbo.readToPixels(fboPixels); - fboImage.setFromPixels(fboPixels); - //mapFbo.draw(0, 0); + fboImage.setFromPixels(fboPixels); } /* diff --git a/src/Player.cpp b/src/Player.cpp index 8d1a491..15737ff 100644 --- a/src/Player.cpp +++ b/src/Player.cpp @@ -44,9 +44,7 @@ ofPixels Player::GetVideoPixels(){ void Player::SetVideo(std::string path, ofFbo &fbo){ videoPlayer.load(path); videoPlayer.setFrame(800); - //fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB); - // Just setting the video dims here for the tsne map! - fbo.allocate(1600, 800, GL_RGB); + fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB); } // Sets a random frame in the active video @@ -54,4 +52,26 @@ void Player::SetRandomFrame(){ int randomFrame = ofRandom(0, videoPlayer.getTotalNumFrames()); std::cout << "setting frame: " << randomFrame << std::endl; videoPlayer.setFrame(randomFrame); +} + +void Player::SetVideoPosition(ofFbo& output_fbo){ + int playerW = videoPlayer.getWidth(); + int playerH = videoPlayer.getHeight(); + + // Calculate the scaling to fit the 2/3 width and full height area + float targetWidth = output_fbo.getWidth(); + float targetHeight = output_fbo.getHeight(); + + float scaleX = targetWidth / playerW; + float scaleY = targetHeight / playerH; + + // Use the larger scaling factor to ensure coverage + float scale = std::max(scaleX, scaleY); + + // Calculate scaled dimensions + int scaledWidth = playerW * scale; + int scaledHeight = playerH * scale; + + // Center the video within the FBO + centerPosition = glm::vec2((targetWidth - scaledWidth) / 2, (targetHeight - scaledHeight) / 2); } \ No newline at end of file diff --git a/src/Player.h b/src/Player.h index 98baf26..aa36817 100644 --- a/src/Player.h +++ b/src/Player.h @@ -12,7 +12,7 @@ class Player { void Draw(); void SetVideo(std::string path, ofFbo &fbo); ofPixels GetVideoPixels(); - void SetVideoPosition(); + void SetVideoPosition(ofFbo& output_fbo); void SetRandomFrame(); void SetupGUI(); void UpdateGUI(); @@ -32,6 +32,7 @@ class Player { ofFbo fbo; + Player(); }; diff --git a/src/main.cpp b/src/main.cpp index 7592dfd..3f8924d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,7 +6,7 @@ int main( ){ //Use ofGLFWWindowSettings for more options like multi-monitor fullscreen ofGLWindowSettings settings; - settings.setSize(1600, 800); + settings.setSize(1920, 1080); settings.setGLVersion(3, 2); settings.windowMode = OF_WINDOW; //can also be OF_FULLSCREEN diff --git a/src/ofApp.cpp b/src/ofApp.cpp index 26084b0..0ad4c1b 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -2,33 +2,47 @@ //-------------------------------------------------------------- void ofApp::setup(){ + /* ofSettings */ ofDisableArbTex(); - ofSetFrameRate(60); - // ofSetVerticalSync(true); + ofSetFrameRate(24); + ofSetVerticalSync(true); + window_width = ofGetWindowWidth(); + window_height = ofGetWindowHeight(); + /* load font */ tf.load("data/fonts/jetbrainsmono-regular.ttf", 20); - map.Setup(); + /* load shader */ + depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag"); - //player.Setup(); - //player.SetVideo("videos/demo.mp4", fbo); + /* setup map */ + map.Setup(); - emoteImage.allocate(260, 260); - tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR); + /* setup video */ + player.Setup(); + player.SetVideo("videos/demo.mp4", model_output_fbo_1); + /* setup models (modelPath, log, useCuda) */ ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vitb.onnx"; ORTCHAR_T* modelPath2 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/yolov5s-face.onnx"; ORTCHAR_T* modelPath3 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/rgb_emotion.onnx"; + ORTCHAR_T* modelPath4 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vits.onnx"; - /* Setup Models (modelPath, log, useCuda) */ yolo.Setup(modelPath2, false, true); depth.Setup(modelPath, false, true); + depth_small.Setup(modelPath4, false, true); emotion.Setup(modelPath3, false, true); - /* Load shader, allocated rampedFbo */ - depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag"); - fbo.allocate(1600, 800, GL_RGB); - rampedFbo.allocate(1600, 800); + /* Depth output fbo */ + model_output_fbo.allocate(window_width / 2, window_height, GL_RGB); + + /* Shader output */ + rampedFbo.allocate(window_width, window_height); + + emoteImage.allocate(260, 260); + tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR); + + screen_fbo.allocate(window_width, window_height, GL_RGB); } @@ -37,7 +51,6 @@ void ofApp::setup(){ void ofApp::update(){ /* Check to see if the application has moved to the first frame As the models need to load first, as the first inference is quite slow */ - auto start = std::chrono::high_resolution_clock::now(); if(ofGetFrameNum() > 0) firstRun = false; @@ -52,35 +65,29 @@ void ofApp::update(){ } /* Setup model input using ofImage, allocated fbo */ - //player.Update(img); - //img.setFromPixels(player.GetVideoPixels()); - + player.Update(img); + img.setFromPixels(player.GetVideoPixels()); + /* Run Models */ try{ - - auto output_tensors = depth.Run(map.fboImage); - float* output_ptr = output_tensors.front().GetTensorMutableData(); - size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount(); - - float min_value = depth.ReduceMin(output_ptr, num_elements); - float max_value = depth.ReduceMax(output_ptr, num_elements); - - depth.Normalize(output_ptr, num_elements, min_value, max_value); - - depth.DataToFbo(output_ptr, 518, 518, fbo); + // map + inferDepthImage(model_output_fbo, map.fboImage, depth); - auto output_tensors_face = yolo.Run(map.fboImage); + // video player + inferDepthImage(model_output_fbo_1, img, depth_small); + + // auto output_tensors_face = yolo.Run(model_input_img); - auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape(); + // auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape(); - unsigned int num_anchors = output_faces[1]; // Number of anchors + // unsigned int num_anchors = output_faces[1]; // Number of anchors - float* output_face_ptr = output_tensors_face.front().GetTensorMutableData(); + // float* output_face_ptr = output_tensors_face.front().GetTensorMutableData(); - faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors); + // faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors); - faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, fbo.getWidth(), fbo.getHeight()); + // faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, outFbo.getWidth(), outFbo.getHeight()); /* As no input is generated for the emotion recognition model, run a dummy vector through the model So it can load */ @@ -112,30 +119,59 @@ void ofApp::update(){ std::cout << "Model did not run" << std::endl; } - - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration duration = end - start; - std::cout << "Time taken for Update: " << duration.count() << " seconds" << std::endl; - } //-------------------------------------------------------------- void ofApp::draw(){ map.Draw(); + + screen_fbo.begin(); + + // Calculate the target width and height for model_output_fbo_1 + float fbo_1_target_width = window_width * 0.5; // 1/2 of the screen width (990px) + float fbo_1_target_height = window_height; // Full height of the screen + + // Calculate the aspect ratio of the video and the FBO + float video_aspect_ratio = model_output_fbo_1.getWidth() / model_output_fbo_1.getHeight(); + float fbo_aspect_ratio = fbo_1_target_width / fbo_1_target_height; + + // Adjust the scaling to cover the FBO area while maintaining aspect ratio + float new_width, new_height; + if (fbo_aspect_ratio > video_aspect_ratio) { + // FBO is wider; scale by width to fill the FBO + new_width = fbo_1_target_width; + new_height = new_width / video_aspect_ratio; // Scale height to maintain aspect ratio + } else { + // FBO is taller; scale by height to fill the FBO + new_height = fbo_1_target_height; + new_width = new_height * video_aspect_ratio; // Scale width to maintain aspect ratio + } + + // Center the video to ensure it fills the FBO and is cropped if necessary + float x_pos = (window_width * 0.75) - (new_width / 2); + float y_pos = (window_height - new_height) / 2; // Center vertically + + // Draw the scaled video inside the FBO + model_output_fbo_1.draw(x_pos, y_pos, new_width, new_height); + + model_output_fbo.draw(0, 0); + + screen_fbo.end(); renderDepthMap(); - if(!firstRun && detected_faces.size() != 0){ - faceDetector.DrawBox(detected_faces); - faceDetector.DrawCenter(detected_faces); - } + // if(!firstRun && detected_faces.size() != 0){ + // faceDetector.DrawBox(detected_faces); + // faceDetector.DrawCenter(detected_faces); + // } ofPushMatrix(); ofSetColor(255); ofSetBackgroundColor(0); tf.drawString(std::to_string(ofGetFrameRate()), 10, 30); ofPopMatrix(); + // emoteImage.draw(640, 0); // for(auto& face : detected_faces){ // ofDrawBitmapString(std::to_string(face.box.emotional_state.emotions[0]), 700, 300); @@ -143,6 +179,19 @@ void ofApp::draw(){ } +void ofApp::inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model){ + auto output_tensors = model.Run(img); + float* output_ptr = output_tensors.front().GetTensorMutableData(); + size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount(); + + float min_value = model.ReduceMin(output_ptr, num_elements); + float max_value = model.ReduceMax(output_ptr, num_elements); + + model.Normalize(output_ptr, num_elements, min_value, max_value); + + model.DataToFbo(output_ptr, 518, 518, fbo); +} + //-------------------------------------------------------------- void ofApp::inferEmotionalState(){ @@ -184,15 +233,17 @@ void ofApp::inferEmotionalState(){ } } +/* + Depth Map Shader Pass +*/ void ofApp::renderDepthMap(){ rampedFbo.begin(); depthToColourShader.begin(); - depthToColourShader.setUniformTexture("tex0", fbo.getTexture(), 0); - depthToColourShader.setUniformTexture("tex1", map.fboImage.getTexture(), 1); + depthToColourShader.setUniformTexture("tex0", screen_fbo.getTexture(), 0); depthToColourShader.setUniform1f("texW", rampedFbo.getWidth()); depthToColourShader.setUniform1f("texH", rampedFbo.getHeight()); - fbo.draw(0, 0); + screen_fbo.draw(0, 0); depthToColourShader.end(); rampedFbo.end(); diff --git a/src/ofApp.h b/src/ofApp.h index df45979..037e908 100644 --- a/src/ofApp.h +++ b/src/ofApp.h @@ -31,15 +31,20 @@ class ofApp : public ofBaseApp{ void gotMessage(ofMessage msg); void inferEmotionalState(); void renderDepthMap(); + void inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model); + + float window_height; + float window_width; ofImage img; - ofFbo fbo; + cv::Mat cvImg; ofVideoGrabber webcam; Player player; bool firstRun = true; Onnx depth; + Onnx depth_small; Onnx yolo; Onnx emotion; ofxCvColorImage emoteImage; @@ -57,4 +62,9 @@ class ofApp : public ofBaseApp{ ofFbo rampedFbo; ofTrueTypeFont tf; + + ofFbo video_player_fbo; + ofFbo model_output_fbo; + ofFbo model_output_fbo_1; + ofFbo screen_fbo; };