Browse Source

before image cropping pre-inference

tsns-map
cailean 3 months ago
parent
commit
589734cce9
  1. 7
      src/Map.cpp
  2. 26
      src/Player.cpp
  3. 3
      src/Player.h
  4. 2
      src/main.cpp
  5. 137
      src/ofApp.cpp
  6. 12
      src/ofApp.h

7
src/Map.cpp

@ -18,9 +18,9 @@ void Map::Setup(){
SetupTSNE();
}
mapFbo.allocate(ofGetWindowWidth(), ofGetWindowHeight(), GL_RGB);
mapFbo.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), GL_RGB);
fboImage.allocate(ofGetWindowWidth(), ofGetWindowHeight(), OF_IMAGE_COLOR);
fboImage.allocate(ofGetWindowWidth() / 2, ofGetWindowHeight(), OF_IMAGE_COLOR);
Setup3D();
@ -66,9 +66,8 @@ void Map::Draw(){
mapFbo.end();
mapFbo.readToPixels(fboPixels);
fboImage.setFromPixels(fboPixels);
//mapFbo.draw(0, 0);
fboImage.setFromPixels(fboPixels);
}
/*

26
src/Player.cpp

@ -44,9 +44,7 @@ ofPixels Player::GetVideoPixels(){
void Player::SetVideo(std::string path, ofFbo &fbo){
videoPlayer.load(path);
videoPlayer.setFrame(800);
//fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB);
// Just setting the video dims here for the tsne map!
fbo.allocate(1600, 800, GL_RGB);
fbo.allocate(videoPlayer.getWidth(), videoPlayer.getHeight(), GL_RGB);
}
// Sets a random frame in the active video
@ -55,3 +53,25 @@ void Player::SetRandomFrame(){
std::cout << "setting frame: " << randomFrame << std::endl;
videoPlayer.setFrame(randomFrame);
}
void Player::SetVideoPosition(ofFbo& output_fbo){
int playerW = videoPlayer.getWidth();
int playerH = videoPlayer.getHeight();
// Calculate the scaling to fit the 2/3 width and full height area
float targetWidth = output_fbo.getWidth();
float targetHeight = output_fbo.getHeight();
float scaleX = targetWidth / playerW;
float scaleY = targetHeight / playerH;
// Use the larger scaling factor to ensure coverage
float scale = std::max(scaleX, scaleY);
// Calculate scaled dimensions
int scaledWidth = playerW * scale;
int scaledHeight = playerH * scale;
// Center the video within the FBO
centerPosition = glm::vec2((targetWidth - scaledWidth) / 2, (targetHeight - scaledHeight) / 2);
}

3
src/Player.h

@ -12,7 +12,7 @@ class Player {
void Draw();
void SetVideo(std::string path, ofFbo &fbo);
ofPixels GetVideoPixels();
void SetVideoPosition();
void SetVideoPosition(ofFbo& output_fbo);
void SetRandomFrame();
void SetupGUI();
void UpdateGUI();
@ -32,6 +32,7 @@ class Player {
ofFbo fbo;
Player();
};

2
src/main.cpp

@ -6,7 +6,7 @@ int main( ){
//Use ofGLFWWindowSettings for more options like multi-monitor fullscreen
ofGLWindowSettings settings;
settings.setSize(1600, 800);
settings.setSize(1920, 1080);
settings.setGLVersion(3, 2);
settings.windowMode = OF_WINDOW; //can also be OF_FULLSCREEN

137
src/ofApp.cpp

@ -2,33 +2,47 @@
//--------------------------------------------------------------
void ofApp::setup(){
/* ofSettings */
ofDisableArbTex();
ofSetFrameRate(60);
// ofSetVerticalSync(true);
ofSetFrameRate(24);
ofSetVerticalSync(true);
window_width = ofGetWindowWidth();
window_height = ofGetWindowHeight();
/* load font */
tf.load("data/fonts/jetbrainsmono-regular.ttf", 20);
map.Setup();
/* load shader */
depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag");
//player.Setup();
//player.SetVideo("videos/demo.mp4", fbo);
/* setup map */
map.Setup();
emoteImage.allocate(260, 260);
tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR);
/* setup video */
player.Setup();
player.SetVideo("videos/demo.mp4", model_output_fbo_1);
/* setup models (modelPath, log, useCuda) */
ORTCHAR_T* modelPath = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vitb.onnx";
ORTCHAR_T* modelPath2 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/yolov5s-face.onnx";
ORTCHAR_T* modelPath3 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/rgb_emotion.onnx";
ORTCHAR_T* modelPath4 = "/home/cailean/Desktop/openframeworks/of_v0.12.0_linux64gcc6_release/apps/myApps/onnx-test/bin/data/depth_anything_v2_vits.onnx";
/* Setup Models (modelPath, log, useCuda) */
yolo.Setup(modelPath2, false, true);
depth.Setup(modelPath, false, true);
depth_small.Setup(modelPath4, false, true);
emotion.Setup(modelPath3, false, true);
/* Load shader, allocated rampedFbo */
depthToColourShader.load("data/shader/rampShader.vert", "data/shader/rampShader.frag");
fbo.allocate(1600, 800, GL_RGB);
rampedFbo.allocate(1600, 800);
/* Depth output fbo */
model_output_fbo.allocate(window_width / 2, window_height, GL_RGB);
/* Shader output */
rampedFbo.allocate(window_width, window_height);
emoteImage.allocate(260, 260);
tempImage.allocate(emoteImage.getWidth(), emoteImage.getHeight(), OF_IMAGE_COLOR);
screen_fbo.allocate(window_width, window_height, GL_RGB);
}
@ -37,7 +51,6 @@ void ofApp::setup(){
void ofApp::update(){
/* Check to see if the application has moved to the first frame
As the models need to load first, as the first inference is quite slow */
auto start = std::chrono::high_resolution_clock::now();
if(ofGetFrameNum() > 0)
firstRun = false;
@ -52,35 +65,29 @@ void ofApp::update(){
}
/* Setup model input using ofImage, allocated fbo */
//player.Update(img);
//img.setFromPixels(player.GetVideoPixels());
player.Update(img);
img.setFromPixels(player.GetVideoPixels());
/* Run Models */
try{
// map
inferDepthImage(model_output_fbo, map.fboImage, depth);
auto output_tensors = depth.Run(map.fboImage);
float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
float min_value = depth.ReduceMin(output_ptr, num_elements);
float max_value = depth.ReduceMax(output_ptr, num_elements);
depth.Normalize(output_ptr, num_elements, min_value, max_value);
// video player
inferDepthImage(model_output_fbo_1, img, depth_small);
depth.DataToFbo(output_ptr, 518, 518, fbo);
// auto output_tensors_face = yolo.Run(model_input_img);
auto output_tensors_face = yolo.Run(map.fboImage);
// auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
auto output_faces = output_tensors_face.front().GetTensorTypeAndShapeInfo().GetShape();
// unsigned int num_anchors = output_faces[1]; // Number of anchors
unsigned int num_anchors = output_faces[1]; // Number of anchors
// float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
float* output_face_ptr = output_tensors_face.front().GetTensorMutableData<float>();
// faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);
faceDetector.ParseOutput(output_face_ptr, detected_faces, num_anchors);
faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, fbo.getWidth(), fbo.getHeight());
// faceDetector.ConvertBoxCoordsToOriginalSize(detected_faces, outFbo.getWidth(), outFbo.getHeight());
/* As no input is generated for the emotion recognition model, run a dummy vector through the model
So it can load */
@ -112,11 +119,6 @@ void ofApp::update(){
std::cout << "Model did not run" << std::endl;
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> duration = end - start;
std::cout << "Time taken for Update: " << duration.count() << " seconds" << std::endl;
}
@ -124,18 +126,52 @@ void ofApp::update(){
void ofApp::draw(){
map.Draw();
renderDepthMap();
screen_fbo.begin();
if(!firstRun && detected_faces.size() != 0){
faceDetector.DrawBox(detected_faces);
faceDetector.DrawCenter(detected_faces);
// Calculate the target width and height for model_output_fbo_1
float fbo_1_target_width = window_width * 0.5; // 1/2 of the screen width (990px)
float fbo_1_target_height = window_height; // Full height of the screen
// Calculate the aspect ratio of the video and the FBO
float video_aspect_ratio = model_output_fbo_1.getWidth() / model_output_fbo_1.getHeight();
float fbo_aspect_ratio = fbo_1_target_width / fbo_1_target_height;
// Adjust the scaling to cover the FBO area while maintaining aspect ratio
float new_width, new_height;
if (fbo_aspect_ratio > video_aspect_ratio) {
// FBO is wider; scale by width to fill the FBO
new_width = fbo_1_target_width;
new_height = new_width / video_aspect_ratio; // Scale height to maintain aspect ratio
} else {
// FBO is taller; scale by height to fill the FBO
new_height = fbo_1_target_height;
new_width = new_height * video_aspect_ratio; // Scale width to maintain aspect ratio
}
// Center the video to ensure it fills the FBO and is cropped if necessary
float x_pos = (window_width * 0.75) - (new_width / 2);
float y_pos = (window_height - new_height) / 2; // Center vertically
// Draw the scaled video inside the FBO
model_output_fbo_1.draw(x_pos, y_pos, new_width, new_height);
model_output_fbo.draw(0, 0);
screen_fbo.end();
renderDepthMap();
// if(!firstRun && detected_faces.size() != 0){
// faceDetector.DrawBox(detected_faces);
// faceDetector.DrawCenter(detected_faces);
// }
ofPushMatrix();
ofSetColor(255);
ofSetBackgroundColor(0);
tf.drawString(std::to_string(ofGetFrameRate()), 10, 30);
ofPopMatrix();
// emoteImage.draw(640, 0);
// for(auto& face : detected_faces){
// ofDrawBitmapString(std::to_string(face.box.emotional_state.emotions[0]), 700, 300);
@ -143,6 +179,19 @@ void ofApp::draw(){
}
void ofApp::inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model){
auto output_tensors = model.Run(img);
float* output_ptr = output_tensors.front().GetTensorMutableData<float>();
size_t num_elements = output_tensors.front().GetTensorTypeAndShapeInfo().GetElementCount();
float min_value = model.ReduceMin(output_ptr, num_elements);
float max_value = model.ReduceMax(output_ptr, num_elements);
model.Normalize(output_ptr, num_elements, min_value, max_value);
model.DataToFbo(output_ptr, 518, 518, fbo);
}
//--------------------------------------------------------------
void ofApp::inferEmotionalState(){
@ -184,15 +233,17 @@ void ofApp::inferEmotionalState(){
}
}
/*
Depth Map Shader Pass
*/
void ofApp::renderDepthMap(){
rampedFbo.begin();
depthToColourShader.begin();
depthToColourShader.setUniformTexture("tex0", fbo.getTexture(), 0);
depthToColourShader.setUniformTexture("tex1", map.fboImage.getTexture(), 1);
depthToColourShader.setUniformTexture("tex0", screen_fbo.getTexture(), 0);
depthToColourShader.setUniform1f("texW", rampedFbo.getWidth());
depthToColourShader.setUniform1f("texH", rampedFbo.getHeight());
fbo.draw(0, 0);
screen_fbo.draw(0, 0);
depthToColourShader.end();
rampedFbo.end();

12
src/ofApp.h

@ -31,15 +31,20 @@ class ofApp : public ofBaseApp{
void gotMessage(ofMessage msg);
void inferEmotionalState();
void renderDepthMap();
void inferDepthImage(ofFbo& fbo, ofImage& img, Onnx& model);
float window_height;
float window_width;
ofImage img;
ofFbo fbo;
cv::Mat cvImg;
ofVideoGrabber webcam;
Player player;
bool firstRun = true;
Onnx depth;
Onnx depth_small;
Onnx yolo;
Onnx emotion;
ofxCvColorImage emoteImage;
@ -57,4 +62,9 @@ class ofApp : public ofBaseApp{
ofFbo rampedFbo;
ofTrueTypeFont tf;
ofFbo video_player_fbo;
ofFbo model_output_fbo;
ofFbo model_output_fbo_1;
ofFbo screen_fbo;
};

Loading…
Cancel
Save