before image cropping pre-inference

1 year ago · 589734cce9
6 changed files with 136 additions and 55 deletions
--- a/src/Map.cpp
+++ b/src/Map.cpp
@ -18,9 +18,9 @@ void Map::Setup(){
        SetupTSNE();
    }
-    mapFbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGB);
+    mapFbo.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), GL_RGB);
-    fboImage.allocate(ofGetWindowWidth(), ofGetWindowHeight(), OF_IMAGE_COLOR);
+    fboImage.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), OF_IMAGE_COLOR);
    Setup3D();
@ -66,9 +66,8 @@ void Map::Draw(){
    mapFbo.end();
    mapFbo.readToPixels(fboPixels);
    fboImage.setFromPixels(fboPixels);
-    //mapFbo.draw(0, 0);
+    fboImage.setFromPixels(fboPixels);
 }
 /*
--- a/src/Player.cpp
+++ b/src/Player.cpp
@ -44,9 +44,7 @@ ofPixels Player::GetVideoPixels(){
 void Player::SetVideo(std::string path, ofFbo &fbo){
    videoPlayer.load(path);
    videoPlayer.setFrame(800);
-    //fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB);
+    fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB);
    // Just setting the video dims here for the tsne map!
    fbo.allocate(1600, 800, GL_RGB);
 }
 // Sets a random frame in the active video
@ -55,3 +53,25 @@ void Player::SetRandomFrame(){
    std::cout << "setting frame: " << randomFrame << std::endl;
    videoPlayer.setFrame(randomFrame);
 }
 void Player::SetVideoPosition(ofFbo& output_fbo){
    int playerW = videoPlayer.getWidth();
    int playerH = videoPlayer.getHeight();
    // Calculate the scaling to fit the 2/3 width and full height area
    float targetWidth = output_fbo.getWidth();
    float targetHeight = output_fbo.getHeight();
    float scaleX = targetWidth / playerW;
    float scaleY = targetHeight / playerH;
    // Use the larger scaling factor to ensure coverage
    float scale = std::max(scaleX, scaleY);
    // Calculate scaled dimensions
    int scaledWidth = playerW * scale;
    int scaledHeight = playerH * scale;
    // Center the video within the FBO
    centerPosition = glm::vec2((targetWidth - scaledWidth) / 2, (targetHeight - scaledHeight) / 2);
 }
--- a/src/Player.h
+++ b/src/Player.h
@ -12,7 +12,7 @@ class Player {
    void Draw();
    void SetVideo(std::string path, ofFbo &fbo);
    ofPixels GetVideoPixels();
-    void SetVideoPosition();
+    void SetVideoPosition(ofFbo& output_fbo);
    void SetRandomFrame();
    void SetupGUI();
    void UpdateGUI();
@ -32,6 +32,7 @@ class Player {
    ofFbo fbo;
    Player();
 };
--- a/src/main.cpp
+++ b/src/main.cpp
@ -6,7 +6,7 @@ int main( ){
 	//Use ofGLFWWindowSettings for more options like multi-monitor fullscreen
 	ofGLWindowSettings settings;
-	settings.setSize(1600, 800);
+	settings.setSize(1920, 1080);
 	settings.setGLVersion(3, 2);
 	settings.windowMode = OF_WINDOW; //can also be OF_FULLSCREEN
--- a/src/ofApp.cpp
+++ b/src/ofApp.cpp
@ -2,33 +2,47 @@
 //--------------------------------------------------------------
 void ofApp::setup(){
    /* ofSettings */
    ofDisableArbTex();
-    ofSetFrameRate(60);
+    ofSetFrameRate(24);
-    // ofSetVerticalSync(true);
+    ofSetVerticalSync(true);
    window_width = ofGetWindowWidth();
    window_height = ofGetWindowHeight();
    /* load font */
    tf.load("data/fonts/jetbrainsmono-regular.ttf", 20);
-    map.Setup();
+    /* load shader */
    depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag");
-    //player.Setup();
+    /* setup map */
-    //player.SetVideo("videos/demo.mp4", fbo);
+    map.Setup();
-    emoteImage.allocate(260, 260);
+    /* setup video */
-    tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR);
+    player.Setup();
    player.SetVideo("videos/demo.mp4", model_output_fbo_1);
    /* setup models (modelPath, log, useCuda)   */
    ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vitb.onnx";
    ORTCHAR_T* modelPath2 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/yolov5s-face.onnx";
    ORTCHAR_T* modelPath3 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/rgb_emotion.onnx";
    ORTCHAR_T* modelPath4 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vits.onnx";
    /* Setup Models (modelPath, log, useCuda)   */
    yolo.Setup(modelPath2, false, true);
    depth.Setup(modelPath, false, true);
    depth_small.Setup(modelPath4, false, true);
    emotion.Setup(modelPath3, false, true);
-    /* Load shader, allocated rampedFbo */
+    /* Depth output fbo */
-    depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag");
+    model_output_fbo.allocate(window_width / 2, window_height, GL_RGB);
-    fbo.allocate(1600, 800, GL_RGB);
+
-    rampedFbo.allocate(1600, 800);
+    /* Shader output */
    rampedFbo.allocate(window_width, window_height);
    emoteImage.allocate(260, 260);
    tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR);
    screen_fbo.allocate(window_width, window_height, GL_RGB);
 }
@ -37,7 +51,6 @@ void ofApp::setup(){
 void ofApp::update(){
    /*  Check to see if the application has moved to the first frame    
        As the models need to load first, as the first inference is quite slow  */ 
    auto start = std::chrono::high_resolution_clock::now();
    if(ofGetFrameNum() > 0)
        firstRun = false;
@ -52,35 +65,29 @@ void ofApp::update(){
    }
    /* Setup model input using ofImage, allocated fbo */
-    //player.Update(img);
+    player.Update(img);
-    //img.setFromPixels(player.GetVideoPixels());
+    img.setFromPixels(player.GetVideoPixels());
    /* Run Models */
    try{
        // map
        inferDepthImage(model_output_fbo, map.fboImage, depth);
-        auto output_tensors = depth.Run(map.fboImage);
+        // video player 
-        float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
+        inferDepthImage(model_output_fbo_1, img, depth_small);
        size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
        float min_value = depth.ReduceMin(output_ptr, num_elements);
        float max_value = depth.ReduceMax(output_ptr, num_elements);
        depth.Normalize(output_ptr, num_elements, min_value, max_value);
-        depth.DataToFbo(output_ptr, 518, 518, fbo);
+        // auto output_tensors_face = yolo.Run(model_input_img);
-        auto output_tensors_face = yolo.Run(map.fboImage);
+        // auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
-        auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
+        // unsigned int num_anchors = output_faces[1];  // Number of anchors
-        unsigned int num_anchors = output_faces[1];  // Number of anchors
+        // float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
-        float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
+        // faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);
-        faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);
+        // faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, outFbo.getWidth(), outFbo.getHeight());
        faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, fbo.getWidth(), fbo.getHeight());
        /* As no input is generated for the emotion recognition model, run a dummy vector through the model
            So it can load */
@ -112,11 +119,6 @@ void ofApp::update(){
        std::cout << "Model did not run" << std::endl;
    }
    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<float> duration = end - start;
    std::cout << "Time taken for Update: " << duration.count() << " seconds" << std::endl;
 }
@ -124,18 +126,52 @@ void ofApp::update(){
 void ofApp::draw(){
    map.Draw();
-    renderDepthMap();
+    screen_fbo.begin();
-    if(!firstRun && detected_faces.size() != 0){
+    // Calculate the target width and height for model_output_fbo_1
-        faceDetector.DrawBox(detected_faces);
+    float fbo_1_target_width = window_width * 0.5;  // 1/2 of the screen width (990px)
-        faceDetector.DrawCenter(detected_faces);
+    float fbo_1_target_height = window_height;  // Full height of the screen
    // Calculate the aspect ratio of the video and the FBO
    float video_aspect_ratio = model_output_fbo_1.getWidth() / model_output_fbo_1.getHeight();
    float fbo_aspect_ratio = fbo_1_target_width / fbo_1_target_height;
    // Adjust the scaling to cover the FBO area while maintaining aspect ratio
    float new_width, new_height;
    if (fbo_aspect_ratio > video_aspect_ratio) {
        // FBO is wider; scale by width to fill the FBO
        new_width = fbo_1_target_width;
        new_height = new_width / video_aspect_ratio;  // Scale height to maintain aspect ratio
    } else {
        // FBO is taller; scale by height to fill the FBO
        new_height = fbo_1_target_height;
        new_width = new_height * video_aspect_ratio;  // Scale width to maintain aspect ratio
    }
    // Center the video to ensure it fills the FBO and is cropped if necessary
    float x_pos = (window_width * 0.75) - (new_width / 2);
    float y_pos = (window_height - new_height) / 2;  // Center vertically
    // Draw the scaled video inside the FBO
    model_output_fbo_1.draw(x_pos, y_pos, new_width, new_height);
    model_output_fbo.draw(0, 0);
    screen_fbo.end();    
    renderDepthMap();
    // if(!firstRun && detected_faces.size() != 0){
    //     faceDetector.DrawBox(detected_faces);
    //     faceDetector.DrawCenter(detected_faces);
    // }
    ofPushMatrix();
    ofSetColor(255);
    ofSetBackgroundColor(0);
    tf.drawString(std::to_string(ofGetFrameRate()), 10, 30);
    ofPopMatrix();
 //   emoteImage.draw(640, 0);
 //   for(auto& face : detected_faces){
 //     ofDrawBitmapString(std::to_string(face.box.emotional_state.emotions[0]), 700, 300);
@ -143,6 +179,19 @@ void ofApp::draw(){
 }
 void ofApp::inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model){
    auto output_tensors = model.Run(img);
    float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
    size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
    float min_value = model.ReduceMin(output_ptr, num_elements);
    float max_value = model.ReduceMax(output_ptr, num_elements);
    model.Normalize(output_ptr, num_elements, min_value, max_value);
    model.DataToFbo(output_ptr, 518, 518, fbo);
 }
 //--------------------------------------------------------------
 void ofApp::inferEmotionalState(){
@ -184,15 +233,17 @@ void ofApp::inferEmotionalState(){
    }
 }
 /*
    Depth Map Shader Pass
 */
 void ofApp::renderDepthMap(){
    rampedFbo.begin();
    depthToColourShader.begin();
-    depthToColourShader.setUniformTexture("tex0", fbo.getTexture(), 0);
+    depthToColourShader.setUniformTexture("tex0", screen_fbo.getTexture(), 0);
    depthToColourShader.setUniformTexture("tex1", map.fboImage.getTexture(), 1);
    depthToColourShader.setUniform1f("texW", rampedFbo.getWidth());
    depthToColourShader.setUniform1f("texH", rampedFbo.getHeight());
-    fbo.draw(0, 0);
+    screen_fbo.draw(0, 0);
    depthToColourShader.end();
    rampedFbo.end();
--- a/src/ofApp.h
+++ b/src/ofApp.h
@ -31,15 +31,20 @@ class ofApp : public ofBaseApp{
 		void gotMessage(ofMessage msg);
 		void inferEmotionalState();
 		void renderDepthMap();
 		void inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model);
 		float window_height;
 		float window_width;
 		ofImage img;
-		ofFbo fbo;
+		
 		cv::Mat cvImg;
 		ofVideoGrabber webcam;
 		Player player;
 		bool firstRun = true;
 		Onnx depth;
 		Onnx depth_small;
 		Onnx yolo;
 		Onnx emotion;
 		ofxCvColorImage emoteImage;
@ -57,4 +62,9 @@ class ofApp : public ofBaseApp{
 		ofFbo rampedFbo;
 		ofTrueTypeFont tf;
 		ofFbo video_player_fbo;
 		ofFbo model_output_fbo;
 		ofFbo model_output_fbo_1;
 		ofFbo screen_fbo;
 };