before image cropping pre-inference

1 year ago · 589734cce9
6 changed files with 136 additions and 55 deletions
--- a/src/Map.cpp
+++ b/src/Map.cpp
@ -18,9 +18,9 @@ void Map::Setup(){
        SetupTSNE();
    }

-    mapFbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGB);
+    mapFbo.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), GL_RGB);

-    fboImage.allocate(ofGetWindowWidth(), ofGetWindowHeight(), OF_IMAGE_COLOR);
+    fboImage.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), OF_IMAGE_COLOR);

    Setup3D();

@ -66,9 +66,8 @@ void Map::Draw(){
    mapFbo.end();

    mapFbo.readToPixels(fboPixels);
-    fboImage.setFromPixels(fboPixels);

-    //mapFbo.draw(0, 0);
+    fboImage.setFromPixels(fboPixels);
 }

 /*
--- a/src/Player.cpp
+++ b/src/Player.cpp
@ -44,9 +44,7 @@ ofPixels Player::GetVideoPixels(){
 void Player::SetVideo(std::string path, ofFbo &fbo){
    videoPlayer.load(path);
    videoPlayer.setFrame(800);
-    //fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB);
-    // Just setting the video dims here for the tsne map!
-    fbo.allocate(1600, 800, GL_RGB);
+    fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB);
 }

 // Sets a random frame in the active video
@ -54,4 +52,26 @@ void Player::SetRandomFrame(){
    int randomFrame = ofRandom(0, videoPlayer.getTotalNumFrames());
    std::cout << "setting frame: " << randomFrame << std::endl;
    videoPlayer.setFrame(randomFrame);
+}
+
+void Player::SetVideoPosition(ofFbo& output_fbo){
+    int playerW = videoPlayer.getWidth();
+    int playerH = videoPlayer.getHeight();
+
+    // Calculate the scaling to fit the 2/3 width and full height area
+    float targetWidth = output_fbo.getWidth();
+    float targetHeight = output_fbo.getHeight();
+
+    float scaleX = targetWidth / playerW;
+    float scaleY = targetHeight / playerH;
+
+    // Use the larger scaling factor to ensure coverage
+    float scale = std::max(scaleX, scaleY);
+
+    // Calculate scaled dimensions
+    int scaledWidth = playerW * scale;
+    int scaledHeight = playerH * scale;
+
+    // Center the video within the FBO
+    centerPosition = glm::vec2((targetWidth - scaledWidth) / 2, (targetHeight - scaledHeight) / 2);
 }
--- a/src/Player.h
+++ b/src/Player.h
@ -12,7 +12,7 @@ class Player {
    void Draw();
    void SetVideo(std::string path, ofFbo &fbo);
    ofPixels GetVideoPixels();
-    void SetVideoPosition();
+    void SetVideoPosition(ofFbo& output_fbo);
    void SetRandomFrame();
    void SetupGUI();
    void UpdateGUI();
@ -32,6 +32,7 @@ class Player {

    ofFbo fbo;
    
+    
    Player();

 };
--- a/src/main.cpp
+++ b/src/main.cpp
@ -6,7 +6,7 @@ int main( ){

 	//Use ofGLFWWindowSettings for more options like multi-monitor fullscreen
 	ofGLWindowSettings settings;
-	settings.setSize(1600, 800);
+	settings.setSize(1920, 1080);
 	settings.setGLVersion(3, 2);
 	settings.windowMode = OF_WINDOW; //can also be OF_FULLSCREEN

--- a/src/ofApp.cpp
+++ b/src/ofApp.cpp
@ -2,33 +2,47 @@

 //--------------------------------------------------------------
 void ofApp::setup(){
+    /* ofSettings */
    ofDisableArbTex();
-    ofSetFrameRate(60);
-    // ofSetVerticalSync(true);
+    ofSetFrameRate(24);
+    ofSetVerticalSync(true);
+    window_width = ofGetWindowWidth();
+    window_height = ofGetWindowHeight();

+    /* load font */
    tf.load("data/fonts/jetbrainsmono-regular.ttf", 20);

-    map.Setup();
+    /* load shader */
+    depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag");

-    //player.Setup();
-    //player.SetVideo("videos/demo.mp4", fbo);
+    /* setup map */
+    map.Setup();

-    emoteImage.allocate(260, 260);
-    tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR);
+    /* setup video */
+    player.Setup();
+    player.SetVideo("videos/demo.mp4", model_output_fbo_1);

+    /* setup models (modelPath, log, useCuda)   */
    ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vitb.onnx";
    ORTCHAR_T* modelPath2 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/yolov5s-face.onnx";
    ORTCHAR_T* modelPath3 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/rgb_emotion.onnx";
+    ORTCHAR_T* modelPath4 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vits.onnx";

-    /* Setup Models (modelPath, log, useCuda)   */
    yolo.Setup(modelPath2, false, true);
    depth.Setup(modelPath, false, true);
+    depth_small.Setup(modelPath4, false, true);
    emotion.Setup(modelPath3, false, true);

-    /* Load shader, allocated rampedFbo */
-    depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag");
-    fbo.allocate(1600, 800, GL_RGB);
-    rampedFbo.allocate(1600, 800);
+    /* Depth output fbo */
+    model_output_fbo.allocate(window_width / 2, window_height, GL_RGB);
+
+    /* Shader output */
+    rampedFbo.allocate(window_width, window_height);
+
+    emoteImage.allocate(260, 260);
+    tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR);
+
+    screen_fbo.allocate(window_width, window_height, GL_RGB);
 }


@ -37,7 +51,6 @@ void ofApp::setup(){
 void ofApp::update(){
    /*  Check to see if the application has moved to the first frame    
        As the models need to load first, as the first inference is quite slow  */ 
-    auto start = std::chrono::high_resolution_clock::now();
    if(ofGetFrameNum() > 0)
        firstRun = false;

@ -52,35 +65,29 @@ void ofApp::update(){
    }
    
    /* Setup model input using ofImage, allocated fbo */
-    //player.Update(img);
-    //img.setFromPixels(player.GetVideoPixels());
-
+    player.Update(img);
+    img.setFromPixels(player.GetVideoPixels());
+    
    /* Run Models */
    try{

-        
-        auto output_tensors = depth.Run(map.fboImage);
-        float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
-        size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
-
-        float min_value = depth.ReduceMin(output_ptr, num_elements);
-        float max_value = depth.ReduceMax(output_ptr, num_elements);
-
-        depth.Normalize(output_ptr, num_elements, min_value, max_value);
-
-        depth.DataToFbo(output_ptr, 518, 518, fbo);
+        // map
+        inferDepthImage(model_output_fbo, map.fboImage, depth);

-        auto output_tensors_face = yolo.Run(map.fboImage);
+        // video player 
+        inferDepthImage(model_output_fbo_1, img, depth_small);
+        
+        // auto output_tensors_face = yolo.Run(model_input_img);

-        auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
+        // auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();

-        unsigned int num_anchors = output_faces[1];  // Number of anchors
+        // unsigned int num_anchors = output_faces[1];  // Number of anchors

-        float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
+        // float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();

-        faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);
+        // faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);

-        faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, fbo.getWidth(), fbo.getHeight());
+        // faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, outFbo.getWidth(), outFbo.getHeight());

        /* As no input is generated for the emotion recognition model, run a dummy vector through the model
            So it can load */
@ -112,30 +119,59 @@ void ofApp::update(){
        std::cout << "Model did not run" << std::endl;

    }
-
-    auto end = std::chrono::high_resolution_clock::now();
-    std::chrono::duration<float> duration = end - start;
-    std::cout << "Time taken for Update: " << duration.count() << " seconds" << std::endl;
-
 }


 //--------------------------------------------------------------
 void ofApp::draw(){
    map.Draw();
+ 
+    screen_fbo.begin();
+
+    // Calculate the target width and height for model_output_fbo_1
+    float fbo_1_target_width = window_width * 0.5;  // 1/2 of the screen width (990px)
+    float fbo_1_target_height = window_height;  // Full height of the screen
+
+    // Calculate the aspect ratio of the video and the FBO
+    float video_aspect_ratio = model_output_fbo_1.getWidth() / model_output_fbo_1.getHeight();
+    float fbo_aspect_ratio = fbo_1_target_width / fbo_1_target_height;
+
+    // Adjust the scaling to cover the FBO area while maintaining aspect ratio
+    float new_width, new_height;
+    if (fbo_aspect_ratio > video_aspect_ratio) {
+        // FBO is wider; scale by width to fill the FBO
+        new_width = fbo_1_target_width;
+        new_height = new_width / video_aspect_ratio;  // Scale height to maintain aspect ratio
+    } else {
+        // FBO is taller; scale by height to fill the FBO
+        new_height = fbo_1_target_height;
+        new_width = new_height * video_aspect_ratio;  // Scale width to maintain aspect ratio
+    }
+
+    // Center the video to ensure it fills the FBO and is cropped if necessary
+    float x_pos = (window_width * 0.75) - (new_width / 2);
+    float y_pos = (window_height - new_height) / 2;  // Center vertically
+
+    // Draw the scaled video inside the FBO
+    model_output_fbo_1.draw(x_pos, y_pos, new_width, new_height);
+
+    model_output_fbo.draw(0, 0);
+
+    screen_fbo.end();    

    renderDepthMap();

-    if(!firstRun && detected_faces.size() != 0){
-        faceDetector.DrawBox(detected_faces);
-        faceDetector.DrawCenter(detected_faces);
-    }
+    // if(!firstRun && detected_faces.size() != 0){
+    //     faceDetector.DrawBox(detected_faces);
+    //     faceDetector.DrawCenter(detected_faces);
+    // }

    ofPushMatrix();
    ofSetColor(255);
    ofSetBackgroundColor(0);
    tf.drawString(std::to_string(ofGetFrameRate()), 10, 30);
    ofPopMatrix();
+    
 //   emoteImage.draw(640, 0);
 //   for(auto& face : detected_faces){
 //     ofDrawBitmapString(std::to_string(face.box.emotional_state.emotions[0]), 700, 300);
@ -143,6 +179,19 @@ void ofApp::draw(){

 }

+void ofApp::inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model){
+    auto output_tensors = model.Run(img);
+    float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
+    size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
+
+    float min_value = model.ReduceMin(output_ptr, num_elements);
+    float max_value = model.ReduceMax(output_ptr, num_elements);
+
+    model.Normalize(output_ptr, num_elements, min_value, max_value);
+
+    model.DataToFbo(output_ptr, 518, 518, fbo);
+}
+
 //--------------------------------------------------------------
 void ofApp::inferEmotionalState(){

@ -184,15 +233,17 @@ void ofApp::inferEmotionalState(){
    }
 }

+/*
+    Depth Map Shader Pass
+*/
 void ofApp::renderDepthMap(){
    rampedFbo.begin();

    depthToColourShader.begin();
-    depthToColourShader.setUniformTexture("tex0", fbo.getTexture(), 0);
-    depthToColourShader.setUniformTexture("tex1", map.fboImage.getTexture(), 1);
+    depthToColourShader.setUniformTexture("tex0", screen_fbo.getTexture(), 0);
    depthToColourShader.setUniform1f("texW", rampedFbo.getWidth());
    depthToColourShader.setUniform1f("texH", rampedFbo.getHeight());
-    fbo.draw(0, 0);
+    screen_fbo.draw(0, 0);
    depthToColourShader.end();

    rampedFbo.end();
--- a/src/ofApp.h
+++ b/src/ofApp.h
@ -31,15 +31,20 @@ class ofApp : public ofBaseApp{
 		void gotMessage(ofMessage msg);
 		void inferEmotionalState();
 		void renderDepthMap();
+		void inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model);
+
+		float window_height;
+		float window_width;

 		ofImage img;
-		ofFbo fbo;
+		
 		cv::Mat cvImg;
 		ofVideoGrabber webcam;
 		Player player;
 		bool firstRun = true;

 		Onnx depth;
+		Onnx depth_small;
 		Onnx yolo;
 		Onnx emotion;
 		ofxCvColorImage emoteImage;
@ -57,4 +62,9 @@ class ofApp : public ofBaseApp{
 		ofFbo rampedFbo;

 		ofTrueTypeFont tf;
+
+		ofFbo video_player_fbo;
+		ofFbo model_output_fbo;
+		ofFbo model_output_fbo_1;
+		ofFbo screen_fbo;
 };