Added camera support, updated fonts, renamed FER classes, option for multiple scripts, keypresses

4 months ago · 4c45524d66
10 changed files with 1174 additions and 42 deletions
--- a/bin/data/Roboto-SemiBold.ttf
+++ b/bin/data/Roboto-SemiBold.ttf
--- a/bin/data/cambria.ttc
+++ b/bin/data/cambria.ttc
--- a/bin/data/text/contemp-macbeth.json
+++ b/bin/data/text/contemp-macbeth.json
--- a/src/main.cpp
+++ b/src/main.cpp
@ -8,7 +8,7 @@ int main( ){
 	//Use ofGLFWWindowSettings for more options like multi-monitor fullscreen
 	ofGLWindowSettings settings;
 	settings.setGLVersion(3, 2);
-	settings.setSize(1920, 1080);
+	settings.setSize(1280, 720);
 	settings.windowMode = OF_WINDOW; //can also be OF_FULLSCREEN

 	auto mainWindow = ofCreateWindow(settings);
--- a/src/ofApp.cpp
+++ b/src/ofApp.cpp
@ -9,20 +9,35 @@ void ofApp::setup(){

    videoPlayer.load(videoPath);
    videoPlayer.setLoopState(OF_LOOP_NORMAL);
-    videoPlayer.play();
+    //videoPlayer.play();

    inputImage.allocate(640, 640, OF_IMAGE_COLOR); // Allocate image, so we don't get any issues when processing on the thread
-    videoFrame.allocate(1920, 1080, OF_IMAGE_COLOR);
+    videoFrame.allocate(1280, 720, OF_IMAGE_COLOR);
+
+    webcam.setDeviceID(0);
+    webcam.setDesiredFrameRate(60);
+    webcam.setup(webcamWidth, webcamHeight);
+
+    ofLog() << "Webcam size: " << webcam.getWidth() << "x" << webcam.getHeight();

    onnx.setup(&inputImage); // setup onnx -> will need to pass in a pointer to the two fbos?
 }

 //--------------------------------------------------------------
 void ofApp::update(){
-    videoPlayer.update();
-    if(videoPlayer.isFrameNew()) {
-        ofPixels & p = videoPlayer.getPixels();
-        videoFrame.setFromPixels(p);
+    //videoPlayer.update();
+    webcam.update();
+    // if(videoPlayer.isFrameNew()) {
+    //     ofPixels & p = videoPlayer.getPixels();
+    //     videoFrame.setFromPixels(p);
+    // }
+
+    if (webcam.isFrameNew() && webcam.isInitialized()) {
+        // ofPixels & webcamPixels = webcam.getPixels();
+        // // Resize webcamPixels to match videoFrame's size
+        // webcamPixels.resizeTo(videoFrame.getPixelsRef());
+        // videoFrame.setFromPixels(videoFrame.getPixels());
+        videoFrame.setFromPixels(webcam.getPixels());
    }
    onnx.update(videoFrame);
    
@ -45,7 +60,9 @@ void ofApp::exit(){

 //--------------------------------------------------------------
 void ofApp::keyPressed(int key){
-
+    if(key == 'f' || key == 'F'){
+        ofToggleFullscreen();
+    }
 }

 //--------------------------------------------------------------
--- a/src/ofApp.h
+++ b/src/ofApp.h
@ -23,4 +23,9 @@ class ofApp : public ofBaseApp{
 		ofImage videoFrame;

 		shared_ptr<ofTeleprompter> teleprompter;
+
+		/* webcam */
+		ofVideoGrabber webcam;
+		int webcamWidth = 1280;
+		int webcamHeight = 720;
 };
--- a/src/ofTeleprompter.cpp
+++ b/src/ofTeleprompter.cpp
@ -2,16 +2,21 @@

 void ofTeleprompter::setup() {
   
-    ofBackground(0);
+    ofBackground(255);
    ofSetVerticalSync(false);
    setupGUI();
-    loadText();

-    textFont.load("Avara.otf", 32);
-    detailsFont.load("Avara-Bold.otf", 24);
+    /* load both texts */
+    loadText(script, filePath);
+    loadText(scriptContemporary, filePathContemp);
+
+    activeScript = &script;
+
+    textFont.load("Roboto-SemiBold.ttf", 24);
+    detailsFont.load("Roboto-SemiBold.ttf", 22);

    // Prepare first line for teleprompter
-    currentSentence = script[currentLine].sentence;
+    currentSentence = (*activeScript)[currentLine].sentence;
    displayedSentence.clear();
    currentLetterIndex = 0;
    lastWordTime = ofGetElapsedTimeMillis();
@ -23,9 +28,9 @@ void ofTeleprompter::setup() {
 void ofTeleprompter::update() {

    if(ofGetFrameNum() < 2) {
-        currentSpeaker = script[currentLine].speaker;
-        currentEmotion = script[currentLine].emotion;
-        currentSentence = script[currentLine].sentence;
+        currentSpeaker = (*activeScript)[currentLine].speaker;
+        currentEmotion = (*activeScript)[currentLine].emotion;
+        currentSentence = (*activeScript)[currentLine].sentence;
    }

    currentLineIndex = ofToString(currentLine + 1) + " / " + ofToString(script.size() + 1);
@ -85,6 +90,7 @@ void ofTeleprompter::setupGUI() {

    useLLMOnly.addListener(this, &ofTeleprompter::toggleOffText);
    useTextOnly.addListener(this, &ofTeleprompter::toggleOffLLM);
+    useContempTextOnly.addListener(this, &ofTeleprompter::toggleContempScript);

    gui.setDefaultWidth(400);
    gui.setup();
@ -97,14 +103,16 @@ void ofTeleprompter::setupGUI() {
    gui.add(temperature.setup("Temperature", 0.7, 0, 1.5));
    gui.add(useLLMOnly.setup("Use LLM Only", false));
    gui.add(useTextOnly.setup("Use Text Only", false));
+    gui.add(useContempTextOnly.setup("Use Contept Text Only", false));
+    gui.add(useGeneratedFeedback.setup("Use LLM Feedback", false));
    gui.add(nextLine.setup("Next Line"));
    gui.add(reset.setup("Reset Script"));
 }

-void ofTeleprompter::loadText() {
+void ofTeleprompter::loadText(std::vector<Line> & _script, std::string & _file) {

-    script.clear();
-    ofFile jsonFile(filePath);
+    _script.clear();
+    ofFile jsonFile(_file);
    if(jsonFile.exists()) {
        ofJson json = ofLoadJson(jsonFile);
        int idx = 0;
@ -115,18 +123,18 @@ void ofTeleprompter::loadText() {
            l.speaker = entry.value("first_speaker", "");
            l.sentence = entry.value("first_text", "");
            l.emotion = entry.value("first_emotion", "");
-            script.push_back(l);
+            _script.push_back(l);
        }
    } else {
        ofLogError() << "JSON file not found: " << filePath;
    }

    // Random Check
-    if (!script.empty()) {
-        int randomIdx = ofRandom(script.size()); // returns float
+    if (!_script.empty()) {
+        int randomIdx = ofRandom(_script.size()); // returns float
        int idx = static_cast<int>(randomIdx);   // convert to int
-        ofLog() << "Random line: " << script[idx].speaker << ": " << script[idx].sentence;
-        ofLog() << "Number of lines: " << script.size();
+        ofLog() << "Random line: " << _script[idx].speaker << ": " << _script[idx].sentence;
+        ofLog() << "Number of lines: " << _script.size();
    }


@ -135,7 +143,7 @@ void ofTeleprompter::loadText() {
 }

 void ofTeleprompter::drawText() {
-    ofSetColor(ofColor::white);
+    ofSetColor(ofColor::red);

    // --- Display speaker and emotion centered at the top ---
    std::string speakerText = "Speaker: " + currentSpeaker.getParameter().toString();
@ -154,7 +162,7 @@ void ofTeleprompter::drawText() {

    // -------

-    ofSetColor(ofColor::yellow);
+    ofSetColor(ofColor::black);
    float margin = 128; // pixels
    float maxWidth = ofGetWidth() - margin * 2;
    std::string wrapped = wrapStringToWidth(displayedSentence, maxWidth);
@ -174,7 +182,7 @@ void ofTeleprompter::drawText() {
    // Draw each line centered horizontally
    for (size_t i = 0; i < lines.size(); ++i) {
        ofRectangle bbox = textFont.getStringBoundingBox(lines[i], 0, 0);
-        float x = (ofGetWidth() - bbox.width) / 2.0f;
+        float x = 128;//(ofGetWidth() - bbox.width) / 2.0f;
        float y = startY + i * textFont.getLineHeight();
        textFont.drawString(lines[i], x, y);
    }
@ -226,18 +234,25 @@ void ofTeleprompter::nextLinePressed() {
    if (((currentEmotionIntensity > 0.8 && currentEmotionDetetced != "neutral") || useLLMOnly) && !useTextOnly) {
        ofLog() << "Generate Line!";

-        std::string speaker = script[currentLine - 1].speaker;
-        std::string sentence = script[currentLine - 1].sentence;
-        std::string emotion = script[currentLine].emotion;
+        std::string speaker = (*activeScript)[currentLine - 1].speaker;
+        std::string sentence = (*activeScript)[currentLine - 1].sentence;
+        std::string emotion = (*activeScript)[currentLine].emotion;

+        if (useGeneratedFeedback) {
+            speaker = currentSpeaker;
+            sentence = currentSentence;
+            emotion = currentEmotion;
+
+            ofLog() << "Using Generated Feedback";
+        }
       
        llmThread.requestPrompt(speaker, sentence, currentEmotionDetetced, temperature);
        waitingForLLM = true;
        // Don't set currentSentence yet!
    } else {
-        currentSpeaker = script[currentLine].speaker;
-        currentEmotion = script[currentLine].emotion;
-        currentSentence = script[currentLine].sentence;
+        currentSpeaker = (*activeScript)[currentLine].speaker;
+        currentEmotion = (*activeScript)[currentLine].emotion;
+        currentSentence = (*activeScript)[currentLine].sentence;
        displayedSentence.clear();
        currentLetterIndex = 0;
        lastWordTime = ofGetElapsedTimeMillis();
@ -250,9 +265,9 @@ void ofTeleprompter::resetScript() {
    currentLine = 0;

    // Prepare teleprompter effect for letter-by-letter
-    currentSpeaker = script[currentLine].speaker;
-    currentEmotion = script[currentLine].emotion;
-    currentSentence = script[currentLine].sentence;
+    currentSpeaker = (*activeScript)[currentLine].speaker;
+    currentEmotion = (*activeScript)[currentLine].emotion;
+    currentSentence = (*activeScript)[currentLine].sentence;

    displayedSentence.clear();
    currentLetterIndex = 0;
@ -270,3 +285,43 @@ void ofTeleprompter::toggleOffText(bool & val) {
        useTextOnly = false;
    }
 }
+
+void ofTeleprompter::toggleContempScript(bool & val) {
+    if (val) {
+        activeScript = &scriptContemporary;
+    } else {
+        activeScript = &script;
+    }
+
+    ofLog() << "Script Size:" + (*activeScript).size();
+}
+
+void ofTeleprompter::keyPressed(int key){
+    if(key == 'f' || key == 'F'){
+        ofToggleFullscreen();
+    }
+
+    if(key == OF_KEY_RIGHT) {
+        nextLinePressed();
+    }
+
+    if(key == OF_KEY_LEFT) {
+        pastLine();
+    }
+
+    if(key == 'r' || key == 'R'){
+        resetScript();
+    }
+}
+
+void ofTeleprompter::pastLine() {
+    if (currentLine < script.size()) {
+        currentLine--;
+        currentSpeaker = (*activeScript)[currentLine].speaker;
+        currentEmotion = (*activeScript)[currentLine].emotion;
+        currentSentence = (*activeScript)[currentLine].sentence;
+        displayedSentence.clear();
+        currentLetterIndex = 0;
+        lastWordTime = ofGetElapsedTimeMillis();
+    }
+}
--- a/src/ofTeleprompter.h
+++ b/src/ofTeleprompter.h
@ -17,14 +17,18 @@ class ofTeleprompter: public ofBaseApp{
        void draw();
        void setupGUI();
        void nextLinePressed();
+        void pastLine();
        void resetScript();
        void sendLLMRequest();
-        void loadText();
+        void loadText(std::vector<Line> & script, std::string & file);
        void drawText();
        void updateCVData(int numOfFacesDetected, std::string emotion, float intensity);

        void toggleOffLLM(bool & val);
        void toggleOffText(bool & val);
+        void toggleContempScript(bool & val);
+        void toggleGeneratedFeeback(bool & val);
+        void keyPressed(int key);

        std::string wrapStringToWidth(const std::string& text, float maxWidth);

@ -35,6 +39,8 @@ class ofTeleprompter: public ofBaseApp{
        ofxButton nextLine;
        ofxToggle useLLMOnly;
        ofxToggle useTextOnly;
+        ofxToggle useContempTextOnly;
+        ofxToggle useGeneratedFeedback;
        ofxButton reset;

        ofxLabel currentLineIndex;
@ -47,16 +53,19 @@ class ofTeleprompter: public ofBaseApp{

        /* script */
        std::vector<Line> script;
+        std::vector<Line> scriptContemporary;
        std::string filePath = "text/preprocess_original.json";
+        std::string filePathContemp = "text/contemp-macbeth.json";
        int currentLine = 0;
        std::string currentLineString = "N/A";
-        const char* emotions[7] = {"anger", "disgust", "fear", "happiness", "neutral", "sadness", "surprise"};
+        const char* emotions[7] = {"anger", "disgust", "fear", "excited", "bored", "upset", "interested"};
+        std::vector<Line>* activeScript = nullptr;

        /* scrolling text */
        int currentLetterIndex = 0;
        std::string currentSentence;
        uint64_t lastWordTime = 0;
-        uint64_t wordDelay = 10;
+        uint64_t wordDelay = 40;
        std::string displayedSentence;
        ofTrueTypeFont textFont;
        ofTrueTypeFont detailsFont;
--- a/src/ofYolo.h
+++ b/src/ofYolo.h
@ -48,7 +48,7 @@ struct Emotion {

    std::string getDominantEmotion() const {
        float values[7] = {anger, disgust, fear, happiness, neutral, sadness, surprise};
-        const char* names[7] = {"anger", "disgust", "fear", "happiness", "neutral", "sadness", "surprise"};
+        const char* names[7] = {"anger", "disgust", "fear", "excited", "bored", "upset", "interested"};
        
        int maxIndex = 0;
        float maxValue = values[0];
--- a/src/onxProcess.cpp
+++ b/src/onxProcess.cpp
@ -48,7 +48,7 @@ void onxProcess::update(ofImage& frame) {

        detectedFaces.clear();
        yolo.ParseOutput(tensor, detectedFaces, numAnchors);
-        yolo.ConvertBoxCoordsToOriginalSize(detectedFaces, 1920, 1080);
+        yolo.ConvertBoxCoordsToOriginalSize(detectedFaces, 1280, 720);

        fdThread.resetInferenceFlag();
        fdThread.shouldRunInference = true;
@ -162,7 +162,7 @@ void onxProcess::setTeleprompterValues() {
        }
    }

-    const char* emotions[7] = {"anger", "disgust", "fear", "happiness", "neutral", "sadness", "surprise"};
+    const char* emotions[7] = {"anger", "disgust", "fear", "excited", "bored", "upset", "interested"};
    std::string maxEmotionName = emotions[maxEmotionIdx];

    dominantEmotion = maxEmotionName;