diff --git a/bin/data/new_tree.bin b/bin/data/new_tree.bin
index 6003a14..ab54210 100644
Binary files a/bin/data/new_tree.bin and b/bin/data/new_tree.bin differ
diff --git a/bin/data/shaders/p_depth.vert b/bin/data/shaders/p_depth.vert
index cf16d18..a5c2828 100644
--- a/bin/data/shaders/p_depth.vert
+++ b/bin/data/shaders/p_depth.vert
@@ -47,6 +47,4 @@ void main()
     clipPos = vec4(ndcPos * clipPos.w, clipPos.w);
     
     gl_Position = clipPos;
-
-    //gl_Position = modelViewProjectionMatrix * position;
 }
diff --git a/bin/image-to-mesh b/bin/image-to-mesh
index c2a4d86..97e95aa 100755
Binary files a/bin/image-to-mesh and b/bin/image-to-mesh differ
diff --git a/src/Bullet.cpp b/src/Bullet.cpp
index 403e223..b483a26 100644
--- a/src/Bullet.cpp
+++ b/src/Bullet.cpp
@@ -26,9 +26,25 @@ void Bullet::setup(vector<Node>& _nodes){
     }
 
     shader.load("shaders/vertex_snap");
+
+    contr_active = false;
 }
 
 void Bullet::update(bool& is_controller_active, Node& chosen_node) {
+    static float blend_factor = 0.0f;
+
+    contr_active = is_controller_active;
+
+    if(chosen_node.tsne_position != last_chosen_node.tsne_position){
+        new_chosen_node = true;
+        is_random_walking = false;
+        time_at_target = 0;
+        random_walk_time = 0;
+        scale_out_factor = 0;
+        camera_velocity = glm::vec3(0.0f);
+    } else {
+        new_chosen_node = false;
+    }
 
     cameraBounds = getCameraBounds(camera);
     world.update();
@@ -39,71 +55,117 @@ void Bullet::update(bool& is_controller_active, Node& chosen_node) {
     glm::vec3 dir = current_pos - target_pos;
     float dist = glm::length(dir);
 
+    // Check if we're close to target
+    if (dist < TARGET_REACHED_DISTANCE) {
+        time_at_target += d_time;
+        
+        // Start random walk after delay
+        if (time_at_target >= RANDOM_WALK_DELAY && !is_random_walking) {
+            is_random_walking = true;
+            random_target = generateRandomTarget();
+        }
+    } else {
+        time_at_target = 0.0f;
+    }
 
-    // Update camera only if conditions are met
-    if(!std::isnan(dist) && dist >= 0.0001f) {
-        glm::vec3 norm_dir = glm::normalize(dir);
+
+    // Handle random walk mode
+    if (is_random_walking) {
+        random_walk_time += d_time;
+
+        glm::vec3 to_random_target = random_target - current_pos;
+        float random_dist = glm::length(to_random_target);
         
+        // Generate new random target when reached current one
+        if (random_dist < 10.0f) {
+            random_target = generateRandomTarget();
+        }
+        
+        // Smooth movement towards random target
+        if (random_dist > 0.0001f) {
+            glm::vec3 random_dir = glm::normalize(to_random_target);
+            camera_velocity = random_dir * RANDOM_WALK_SPEED * d_time;
+        }
+    }
+    // Regular movement towards chosen node
+    else if (!std::isnan(dist) && dist >= 0.0001f) {
+
+        glm::vec3 norm_dir = glm::normalize(dir);
+
         if(!std::isnan(norm_dir.x) && !std::isnan(norm_dir.y) && !std::isnan(norm_dir.z)) {
-            if (is_controller_active) {
-                if (glm::length(camera_velocity) > 0.0f) {
-                    glm::vec3 decel_dir = -glm::normalize(camera_velocity);
-                    camera_velocity += decel_dir * DECELERATION * d_time;
-                    // if (glm::length(camera_velocity) < 0.01f) {
-                    //     camera_velocity = glm::vec3(0.0f);
-                    // }
-                }
-            } else {
+
                 float distance_factor = glm::min(dist / 50.0f, 1.0f);
                 float current_acceleration = ACCELERATION * distance_factor;
-                
-                
                 glm::vec3 acceleration = -norm_dir * current_acceleration * d_time;
-                // Check for NaN before applying
+                
                 if(!std::isnan(acceleration.x) && !std::isnan(acceleration.y) && !std::isnan(acceleration.z)) {
                     camera_velocity += acceleration;
                 }
                 
-                
+                /* decelerate if close to target */
                 if (dist < 5.0f) {
                     float decel_factor = dist / 5.0f;
                     camera_velocity *= (1.0f - (1.0f - decel_factor) * d_time * 10.0f);
                 }
                 
                 float mag = glm::length(camera_velocity);
+
+                /* clamp vel */
                 if (mag > MAX_VELOCITY) {
                     camera_velocity = glm::normalize(camera_velocity) * MAX_VELOCITY;
                 }
-
-                // Smoothed zoom based on velocity
-                float vel_magnitude = glm::length(camera_velocity);
-                static float current_zoom = 0.02f;  // Store current zoom
-                float target_zoom = ofMap(vel_magnitude, 0, MAX_VELOCITY, 0.02, 0.3, true);
-                
-                // Smooth lerp to target zoom
-                current_zoom = current_zoom + (target_zoom - current_zoom) * 0.1f;
-                camera.setScale(current_zoom);
             }
+    }
 
-            // Final position update with safety check
-            glm::vec3 new_pos = current_pos + camera_velocity * d_time;
-            camera.setPosition(new_pos);
-            // if(!std::isnan(new_pos.x) && !std::isnan(new_pos.y) && !std::isnan(new_pos.z)) {
-            //     camera.setPosition(new_pos);
-            // }
+    // Smoothed zoom based on velocity
+    float vel_magnitude = glm::length(camera_velocity);
+    static float current_zoom = 0.05f;
+    float target_zoom = ofMap(vel_magnitude, 0, MAX_VELOCITY, 0.05, 0.3, true);
+    current_zoom = current_zoom + (target_zoom - current_zoom) * 0.1f;
+    
+    // Calculate scale-out factor if we're in random walk mode
+    if (random_walk_time > SCALE_OUT_START_TIME) {
+        float scale_progress = (random_walk_time - SCALE_OUT_START_TIME) / SCALE_OUT_DURATION;
+        scale_progress = ofClamp(scale_progress, 0.0f, 1.0f);
+        
+        // Smoother easing function (quintic ease in/out)
+        float t = scale_progress;
+        if (t < 0.5f) {
+            scale_out_factor = 16 * t * t * t * t * t;
+        } else {
+            t = t - 1;
+            scale_out_factor = 1 + 16 * t * t * t * t * t;
         }
     }
-    
 
-    // if (glm::length(current_pos - target_pos) < 1.0f) {
-    //     camera.setPosition(target_pos);
-    //     //camera_velocity = glm::vec3(0.0f);
-    // }
+    // Blend between velocity-based zoom and scale-out with smooth transitions both ways
+    float final_scale = current_zoom;
+
+    // Calculate target blend based on whether we're random walking
+    float target_blend = is_random_walking ? scale_out_factor : 0.0f;
+    blend_factor += (target_blend - blend_factor) * 0.05f; // Smooth transition both ways
+
+    if (blend_factor > 0.001f) { // Apply blended scale if we have any blend factor
+        final_scale = ofLerp(current_zoom, 0.3, blend_factor);
+    }
 
+    camera.setScale(final_scale);
+    
+    // Update camera position
+    glm::vec3 new_pos = current_pos + camera_velocity * d_time;
+    
+    // Clamp position within bounds
+    new_pos.x = ofClamp(new_pos.x, -RANDOM_WALK_RADIUS, RANDOM_WALK_RADIUS);
+    new_pos.y = ofClamp(new_pos.y, -RANDOM_WALK_RADIUS, RANDOM_WALK_RADIUS);
+    
+    camera.setPosition(new_pos);
+    
     // Always update these regardless of camera movement
     for(auto& node : nodes) {
         updateNodeActivation(node);
     }
+    
+    last_chosen_node = chosen_node;
 }
 
 void Bullet::draw(){
@@ -171,6 +233,8 @@ void Bullet::draw(){
     ss << "Camera Position: " << camera.getPosition() << std::endl;
     ss << "Camera Scale: " << camera.getScale() << std::endl;
     ss << "Camera Bounds: " << cameraBounds.z << std::endl;
+    ss << "Conotroller Active: " << contr_active << std::endl;
+    ss << "Random Walk: " << is_random_walking << std::endl;
 	ofSetColor(ofColor::blue);
 	ofDrawBitmapString(ss.str().c_str(), 20, 20);
     now = false;
diff --git a/src/Bullet.h b/src/Bullet.h
index 2fb2ac3..148a728 100644
--- a/src/Bullet.h
+++ b/src/Bullet.h
@@ -76,6 +76,8 @@ class Bullet{
         ofLight						light;
         ofTexture                   tex;
 
+        bool                        contr_active;
+
         ofShader shader;
         float gridSize = 20.0f;
 
@@ -155,5 +157,33 @@ class Bullet{
         void updateShapeBatch(size_t start, size_t end);
         bool now = true;
         bool new_chosen_node;
+        Node last_chosen_node;
         Node* current_target_node = nullptr;
+
+        /* random walk vars */
+        const float RANDOM_WALK_RADIUS = 1000.0f;
+        const float RANDOM_WALK_SPEED = 25.0f;  // Adjust for slower/faster movement
+        const float TARGET_REACHED_DISTANCE = 5.0f;
+        const float RANDOM_WALK_DELAY = 5.0f;  // 5 seconds delay
+
+        const float SCALE_OUT_START_TIME = 15.0f;  // When to start scaling out
+        const float SCALE_OUT_DURATION = 30.0f;    // How long to scale out
+        const float INITIAL_SCALE = 0.05f;         // Starting scale
+        const float TARGET_SCALE = 0.2f;           // Final scale
+
+        bool is_random_walking = false;
+        float time_at_target = 0.0f;
+        glm::vec3 random_target;
+        float current_scale = INITIAL_SCALE;
+        float random_walk_time = 0.0f;
+        float scale_out_factor = 0.0f;
+
+         // Generate new random target within bounds
+        glm::vec3 generateRandomTarget() {
+            return glm::vec3(
+                ofRandom(-RANDOM_WALK_RADIUS, RANDOM_WALK_RADIUS),
+                ofRandom(-RANDOM_WALK_RADIUS, RANDOM_WALK_RADIUS),
+                camera.getPosition().z  // Keep same Z level
+            );
+        }
 };
diff --git a/src/ofApp.cpp b/src/ofApp.cpp
index c3fa6eb..1551f31 100644
--- a/src/ofApp.cpp
+++ b/src/ofApp.cpp
@@ -10,6 +10,8 @@ void ofApp::setup(){
     portrait_fbo.allocate((ofGetWindowWidth() / 2), map_h, GL_RGBA); // this shoould be full with
     portrait_pre_fbo.allocate((ofGetWindowWidth() / 3) * 1, map_h, GL_RGB); // this should be full, and same below
     portrait_pre_fbo_alpha.allocate((ofGetWindowWidth() / 3) * 1, map_h, GL_RGBA);
+    portrait_cropped.allocate((ofGetWindowWidth() / 3) * 1, map_h, GL_RGB);
+    portrait_cropped_alpha.allocate((ofGetWindowWidth() / 3) * 1, map_h, GL_RGBA);
     comp_fbo.allocate(ofGetWindowWidth(), map_h, GL_RGBA);
 
     model_outptut_fbo.allocate(map_fbo.getWidth(), map_h, GL_RGB);
@@ -24,8 +26,10 @@ void ofApp::setup(){
     model_image_esp.allocate(128 * 2, 168 * 2, OF_IMAGE_COLOR);
     model_image.allocate(map_fbo.getWidth(), map_h, OF_IMAGE_COLOR);
     model_image_portrait.allocate(portrait_pre_fbo_alpha.getWidth(), portrait_pre_fbo_alpha.getHeight(), OF_IMAGE_COLOR);
+    model_image_portrait_cropped.allocate(portrait_pre_fbo_alpha.getWidth(), portrait_pre_fbo_alpha.getHeight(), OF_IMAGE_COLOR);
 
     map_pixels.allocate(map_fbo.getWidth(), map_h, OF_PIXELS_RGB);
+    alpha_demo.allocate(portrait_pre_fbo_alpha.getWidth(), portrait_pre_fbo_alpha.getHeight(), OF_IMAGE_COLOR_ALPHA);
 
     /* allocated ofImages in esp_images */
     for(int i = 0; i < 4; i++){
@@ -52,6 +56,8 @@ void ofApp::setup(){
    
     ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/image-to-mesh/bin/data/models/depth_anything_v2_vits.onnx";
     ORTCHAR_T* modelPath_Small = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/image-to-mesh/bin/data/models/depth_anything_v2_vitb.onnx";
+    ORTCHAR_T* modelPath_Yolo = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/image-to-mesh/bin/data/models/yolov5s-face.onnx";
+    ORTCHAR_T* modelPath_Indoor_Dynamic = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/image-to-mesh/bin/data/models/depth_anything_v2_vitl.onnx";
 
     /* bullet setup */
     bullet.setup(nodes);
@@ -64,13 +70,15 @@ void ofApp::setup(){
     /* onnx setup */
     depth_onnx.Setup(modelPath, false, true);
     depth_onnx_esp.Setup(modelPath, false, true);
-    depth_onnx_portrait.Setup(modelPath_Small, false, true);
+    depth_onnx_portrait.Setup(modelPath, false, true);
+    yolo_onnx.Setup(modelPath_Yolo, false, true);
 
     /* multi-thread setup */
     depth_thread.setup(&model_image, &model_outptut_fbo, &depth_onnx);
     depth_esp.setup(&model_image_esp, &model_esp_out_fbo, &depth_onnx_esp);
-    depth_portrait.setup(&model_image_portrait, &model_portrait_out_fbo, &depth_onnx_portrait);
-
+    depth_portrait.setup(&model_image_portrait_cropped, &model_portrait_out_fbo, &depth_onnx_portrait);
+    yolo.setupYolo(&model_image_portrait, &detected_faces, &yolo_onnx, &face_detector);
+    
     /* camera settings for portrait */
     CameraPosition cp;
     cp.position = glm::vec3(116.388, -441.054, 967.724);
@@ -86,9 +94,9 @@ void ofApp::setup(){
     cam_positions.push_back(cp);
 
     /* settings */
-    // portrait_camera.enableOrtho();
-    // portrait_camera.setNearClip(-10000);
-    // portrait_camera.setFarClip(10000);
+    portrait_camera.enableOrtho();
+    portrait_camera.setNearClip(-10000);
+    portrait_camera.setFarClip(10000);
     portrait_camera.setPosition(cam_positions[0].position);
     portrait_camera.setOrientation(cam_positions[0].rotation);
     portrait_camera.setScale(0.5);
@@ -111,7 +119,6 @@ void ofApp::setup(){
 
 //--------------------------------------------------------------
 void ofApp::update(){
-
     server->update(esp_comp_fbo);
 
     float current_time = ofGetElapsedTimef();
@@ -141,6 +148,7 @@ void ofApp::update(){
         depth_thread.start();
         depth_esp.start();
         depth_portrait.start();
+        yolo.start();
     }
 
     /* write pixels to model input image */
@@ -156,10 +164,8 @@ void ofApp::update(){
     try{
         depth_thread.update();
         
-
         /* set output to fbo's */
         depth_onnx.SetPixels(model_outptut_fbo);
-        
 
     } catch (exception e){
         std::cout << "Model did not run" << std::endl;
@@ -167,13 +173,13 @@ void ofApp::update(){
 
     //mapper.update();
     bullet.update(server->is_active, server->getChosenNode());
+
     
     //std::cout << portrait_camera.getPosition() << " : " <<portrait_camera.getOrientationQuat()<< std::endl;
 }
 
 //--------------------------------------------------------------
 void ofApp::draw(){
-   
     ofPushStyle();
     map_fbo_alpha.begin();
     ofClear(0);
@@ -229,23 +235,21 @@ void ofApp::draw(){
     map_fbo_post.draw(0, 0);
     /* actual map */
     map_fbo_alpha.draw(0,0);
-    //model_portrait_out_fbo.draw(0,0);
-    //portrait_pre_fbo_alpha.draw(model_portrait_out_fbo.getWidth(),0);
     shaders.end();
     comp_fbo.end();
 
     comp_fbo.getTexture().setTextureMinMagFilter(GL_NEAREST, GL_NEAREST);
 
     //mapper.draw();
-    comp_fbo.draw(0,60);
+
+    ofTranslate(0, 60);
+    comp_fbo.draw(0, 0);
 
     //server->print();
 }
 
 void ofApp::drawPortrait(){
 
-    float p_scale =  1 + ( ((1 + ofNoise(ofGetElapsedTimef() / 5)) / 2) * 0.5);
-
     portrait_pre_fbo.begin();
     ofClear(0, 0, 0, 0);
 
@@ -261,53 +265,112 @@ void ofApp::drawPortrait(){
     ofPushMatrix();
     // Move to center of FBO
     ofTranslate(portrait_pre_fbo.getWidth()/2, portrait_pre_fbo.getHeight()); // 0.8
-    // Apply scale
-    ofScale(1);
     // Move back by half the scaled image dimensions
     ofTranslate(-scaledWidth/2, -scaledHeight);
     // Draw at 0,0 since we've already translated
     last_chosen_node.img.draw(0, 0, scaledWidth, scaledHeight);
     ofPopMatrix();
-
     portrait_pre_fbo.end();
     
     portrait_pre_fbo_alpha.begin();
     ofPushMatrix();
     ofClear(0, 0, 0, 0);
     ofTranslate(portrait_pre_fbo.getWidth()/2, portrait_pre_fbo.getHeight());
-    // Apply scale
-    ofScale(1);
     // Move back by half the scaled image dimensions
     ofTranslate(-scaledWidth/2, -scaledHeight);
     last_chosen_node.img.draw(0, 0, scaledWidth, scaledHeight);
     ofPopMatrix();
     portrait_pre_fbo_alpha.end();
+    
+    portrait_pre_fbo_alpha.readToPixels(alpha_pixels);
+    alpha_demo.setFromPixels(alpha_pixels);
 
     float current_time = ofGetElapsedTimef();
 
-    if(portrait_needs_update || past_plane_size != plane_size) {
-        past_plane_size = plane_size;
-        depth_ready = false;
-        mesh_ready = false;
-        last_process_time = current_time;
+    // we have full portrait & alpha portrait. 
+
+    if(portrait_needs_update) {
+        yolo.resetInferenceFlag();
         
         ofPixels pix;
         portrait_pre_fbo.readToPixels(pix);
         model_image_portrait.setFromPixels(pix);
 
-        // Queue the depth processing
-        depth_portrait.update();
+        detected_faces.clear();
+        yolo.update();  // This triggers the thread to run
+        
+        portrait_needs_update = false;
     }
 
-    // Give the depth processing some time to complete (adjust timeout as needed)
-    const float PROCESS_TIMEOUT = 0.1; // half second timeout
-    if(!depth_ready && (current_time - last_process_time) > PROCESS_TIMEOUT) {
-        depth_onnx_portrait.SetPixels(model_portrait_out_fbo);
-        depth_ready = true;
+    if(yolo.checkInferenceComplete()) {
+        /* check if face is detected, and crop potrait */
+        if(detected_faces.size() > 0){
+            for(auto& f : detected_faces){
+                float crop_width = f.box.x2 - f.box.x1;
+                float crop_height = f.box.y2 - f.box.y1;
+                
+                // Calculate scaling to fit in FBO while maintaining aspect ratio
+                float scale = min(
+                    (float)portrait_cropped.getWidth() / crop_width,
+                    (float)portrait_cropped.getHeight() / crop_height
+                );
+                
+                float scaledWidth = crop_width * scale;
+                float scaledHeight = crop_height * scale;
+                
+                // Calculate position to center in FBO
+                float x = (portrait_cropped.getWidth() - scaledWidth) / 2;
+                float y = (portrait_cropped.getHeight() - scaledHeight) / 2;
+                
+                portrait_cropped.begin();
+                ofClear(ofColor::black);
+                ofPushMatrix();
+                // Move to center position
+                ofTranslate(x, y);
+                // Draw the cropped section at the calculated size
+                model_image_portrait.drawSubsection(
+                    0, 0,               // Draw at translated position
+                    scaledWidth, scaledHeight,  // Draw at scaled size
+                    f.box.x1, f.box.y1,        // Start crop from face detection
+                    crop_width, crop_height     // Amount to crop
+                );
+                ofPopMatrix();
+                portrait_cropped.end();
+                
+                portrait_cropped_alpha.begin();
+                ofClear(0);
+                ofPushMatrix();
+                // Move to center position
+                ofTranslate(x, y);
+                // Draw the cropped section at the calculated size
+                alpha_demo.drawSubsection(
+                    0, 0,               // Draw at translated position
+                    scaledWidth, scaledHeight,  // Draw at scaled size
+                    f.box.x1, f.box.y1,        // Start crop from face detection
+                    crop_width, crop_height     // Amount to crop
+                );
+                ofPopMatrix();
+                portrait_cropped_alpha.end();
+
+                ofPixels c_pix;
+                portrait_cropped.readToPixels(c_pix);
+                model_image_portrait_cropped.setFromPixels(c_pix);
+            }
+        } else {
+            /* don't use cropped image for depth model */
+            ofPixels c_pix;
+            portrait_pre_fbo.readToPixels(c_pix);
+            model_image_portrait_cropped.setFromPixels(c_pix);
+        }
+
+        /* run depth model on cropped portrait */
+        depth_portrait.update();
+        yolo.resetInferenceFlag();
     }
 
-    // Only generate mesh once depth is ready
-    if(depth_ready && !mesh_ready) {
+    if(depth_portrait.checkInferenceComplete()){
+
+        depth_onnx_portrait.SetPixels(model_portrait_out_fbo);
         float planeScale = 1.0;
         int planeWidth = portrait_pre_fbo_alpha.getWidth() * planeScale;
         int planeHeight = portrait_pre_fbo_alpha.getHeight() * planeScale;
@@ -316,21 +379,28 @@ void ofApp::drawPortrait(){
         int planeRows = planeHeight / planeGridSize;
 
         plane.set(planeWidth, planeHeight, planeColumns, planeRows, OF_PRIMITIVE_TRIANGLES);
-        plane.mapTexCoords(0, 0, planeWidth, planeHeight);
         custom_mesh = createCustomPlane(planeWidth, planeHeight, planeColumns, planeRows);
         
-        //ofLog() << "Generated new mesh at time: " << current_time;
-        mesh_ready = true;
-
         /* send osc message when new node selected*/
         server->sendOSCMessage();
+        depth_portrait.resetInferenceFlag();
     }
 }
 
+
+
 void ofApp::drawPortraitZ(){
     ofBackgroundGradient(ofColor::white, ofColor::orange, OF_GRADIENT_LINEAR);
 
-    ofTexture tex_color = portrait_pre_fbo_alpha.getTexture();
+    /* if no faces found use full portrait texture */
+    ofTexture tex_color;
+
+    if(detected_faces.size() > 0){
+        tex_color = portrait_cropped_alpha.getTexture();
+    } else {
+        tex_color = portrait_pre_fbo_alpha.getTexture();
+    }
+    
     ofTexture tex_depth = model_portrait_out_fbo.getTexture();
 
     float minDepth = std::numeric_limits<float>::max();
@@ -366,7 +436,7 @@ void ofApp::drawPortraitZ(){
     p_depth.setUniformTexture("tex1", tex_depth, 1);
     ofFill();
     ofTranslate(p_noise_x, p_noise_y, 0);
-    custom_mesh.draw();
+    plane.draw();
     p_depth.end();
     ofPopMatrix();
     ofDisableDepthTest();
diff --git a/src/ofApp.h b/src/ofApp.h
index 49e2ecf..26e8b82 100644
--- a/src/ofApp.h
+++ b/src/ofApp.h
@@ -50,6 +50,8 @@ class ofApp : public ofBaseApp{
 		ofFbo portrait_fbo;
 		ofFbo portrait_pre_fbo;
 		ofFbo portrait_pre_fbo_alpha;
+		ofFbo portrait_cropped;
+		ofFbo portrait_cropped_alpha;
 		ofFbo comp_fbo;
 		ofFbo model_outptut_fbo;
 		ofFbo model_esp_out_fbo;
@@ -59,6 +61,7 @@ class ofApp : public ofBaseApp{
 		ofFbo kd_out;
 		ofImage model_image_esp;
 		ofImage model_image_portrait;
+		ofImage model_image_portrait_cropped;
 
 		ofShader shaders;
 		ofShader esp_shader;
@@ -75,11 +78,19 @@ class ofApp : public ofBaseApp{
 		Onnx depth_onnx;
 		Onnx depth_onnx_esp;
 		Onnx depth_onnx_portrait;
+		Onnx yolo_onnx;
 		ModelThread depth_thread;
 		ModelThread depth_esp;
 		ModelThread depth_portrait;
+		ModelThread yolo;
+		Yolo face_detector;
 		ofImage model_image;
+		ofImage yolo_t_image;
+		ofxCvColorImage emote_image;
 		ofPixels map_pixels;
+		std::vector<types::BoxfWithLandmarks> detected_faces;
+		std::vector<ofImage> croppedFaces;
+		int emotionImageMaxBatchSize = 5;
 
 		/* the current embedding */
 		Embedding embed;
@@ -129,4 +140,8 @@ class ofApp : public ofBaseApp{
 		bool depth_ready = false;
 		bool mesh_ready = false;
 		float last_process_time = 0;
+		bool waiting_for_yolo;
+
+		ofImage alpha_demo;
+		ofPixels alpha_pixels;
 };
diff --git a/src/onnx/ModelThread.h b/src/onnx/ModelThread.h
index 6267438..3894cc1 100644
--- a/src/onnx/ModelThread.h
+++ b/src/onnx/ModelThread.h
@@ -15,6 +15,7 @@ class ModelThread : public ofThread
     // emotional recognition model
     std::vector<ofImage>* croppedFaces;
     float* emotional_data;
+    bool isInferenceComplete = false;
 
 
     ~ModelThread(){
@@ -78,6 +79,20 @@ class ModelThread : public ofThread
         float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
 
         yolo->ParseOutput(output_face_ptr, *detected_faces, num_anchors);
+
+        yolo->ConvertBoxCoordsToOriginalSize(*detected_faces, img->getWidth(), img->getHeight());
+
+        isInferenceComplete = true;
+    }
+
+    bool checkInferenceComplete() {
+        std::lock_guard<std::mutex> lock(mutex);
+        return isInferenceComplete;
+    }
+    
+    void resetInferenceFlag() {
+        std::lock_guard<std::mutex> lock(mutex);
+        isInferenceComplete = false;
     }
 
     void inferDepthImage(ofFbo* fbo, ofImage* img, Onnx* model){
@@ -91,6 +106,8 @@ class ModelThread : public ofThread
         model->Normalize(output_ptr, num_elements, min_value, max_value);
 
         model->DataToFbo(output_ptr, 518, 518, *fbo);
+
+        isInferenceComplete = true;
     }
 
 
diff --git a/src/onnx/Yolo.cpp b/src/onnx/Yolo.cpp
index fe48489..72ee9e3 100644
--- a/src/onnx/Yolo.cpp
+++ b/src/onnx/Yolo.cpp
@@ -46,9 +46,11 @@ void Yolo::ParseOutput(float* &output_tensors, std::vector<types::BoxfWithLandma
 // Simple helper for drawing boxes given x1, y1, x2, y2 coordinates.
 void Yolo::DrawBox(std::vector<types::BoxfWithLandmarks> &detected_faces){
     for (const auto &face : detected_faces) {
+        ofPushStyle();
         ofNoFill();
-        float w = ofGetWindowWidth() / 2;
-        ofDrawRectangle(face.box.x1 + w, face.box.y1, ((face.box.x2 + w) - (face.box.x1 + w)), face.box.y2 - face.box.y1);
+        ofSetColor(ofColor::cyan);
+        ofDrawRectangle(face.box.x1, face.box.y1, ((face.box.x2) - (face.box.x1)), face.box.y2 - face.box.y1);
+        ofPopStyle();
     }
 }
 
@@ -96,7 +98,7 @@ void Yolo::NonMaximumSuppression(std::vector<types::BoxfWithLandmarks> &input_fa
 void Yolo::ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks> &detected_faces, size_t original_width, size_t original_height){
     float width_scale = static_cast<float>(original_width) / modelSize;
     float height_scale = static_cast<float>(original_height) / modelSize;
-
+    
     for (auto &face : detected_faces) {
         // Convert bounding box coordinates
         face.box.x1 *= width_scale;
@@ -104,12 +106,28 @@ void Yolo::ConvertBoxCoordsToOriginalSize(std::vector<types::BoxfWithLandmarks>
         face.box.x2 *= width_scale;
         face.box.y2 *= height_scale;
         face.box.UpdateCenter();
-
-        // Convert landmarks
-        for (size_t j = 0; j < face.landmarks.points.size(); ++j) {
-            face.landmarks.points[j].x *= width_scale;
-            face.landmarks.points[j].y *= height_scale;
-        }
+        
+        // Expansion factor
+        float expansion_factor = 1.0f;
+        
+        // Find current dimensions
+        float current_width = face.box.x2 - face.box.x1;
+        float current_height = face.box.y2 - face.box.y1;
+        
+        // Use width as the base for the 1:1.5 ratio
+        float base_width = max(current_width, current_height);
+        float new_width = base_width;
+        float new_height = base_width * 1.5f;  // Make height 1.5 times the width
+        
+        // Apply expansion
+        new_width *= expansion_factor;
+        new_height *= expansion_factor;
+        
+        // Recalculate coordinates maintaining the center
+        face.box.x1 = face.box.center.x - (new_width / 2.0f);
+        face.box.x2 = face.box.center.x + (new_width / 2.0f);
+        face.box.y1 = face.box.center.y - (new_height / 2.0f);
+        face.box.y2 = face.box.center.y + (new_height / 2.0f);
     }
 }
 
@@ -146,7 +164,7 @@ void Yolo::CropFaceToImage(ofImage &inputImage, types::BoxfWithLandmarks &face,
     tempFbo.readToPixels(pix);
     colorImage.setFromPixels(pix);
 
-    colorImage.resize(260, 260);
+    colorImage.resize(512, 512);
 }
 
 void Yolo::SortDetectedFaces(std::vector<types::BoxfWithLandmarks> &detectedFaces){