Implement LLM integration with request handling and GUI controls

2 days ago · 321618fcd2
4 changed files with 232 additions and 11 deletions
--- a/src/LLMRequestThread.h
+++ b/src/LLMRequestThread.h
@ -0,0 +1,152 @@
+#pragma once
+#include "ofMain.h"
+#include "ofThread.h"
+#include <condition_variable>
+#include <mutex>
+#include <atomic>
+#include <string>
+#include "ofxNetwork.h"
+
+
+class LLMRequestThread : public ofThread {
+public:
+    LLMRequestThread() {}
+    ~LLMRequestThread() {
+        stop();
+        waitForThread(false);
+    }
+
+    void setup(std::string url_) {
+        url = url_;
+        resultReady = false;
+        startThread();
+    }
+
+    void requestPromptTest(std::string newPrompt) {
+        std::unique_lock<std::mutex> lock(mutex);
+        prompt = newPrompt;
+        resultReady = false;
+        hasNewPrompt = true;
+        condition.notify_all(); // wake thread to start processing
+    }
+
+     void requestPrompt(const std::string& speaker_, const std::string& text_, const std::string& emotion_, const float temp_) {
+        std::unique_lock<std::mutex> lock(mutex);
+        speaker = speaker_;
+        text = text_;
+        emotion = emotion_;
+        llmTemperature = temp_;
+        resultReady = false;
+        hasNewPrompt = true;
+        condition.notify_all(); // wake thread to start processing
+    }
+
+    std::string getResult() {
+        std::unique_lock<std::mutex> lock(mutex);
+        return result;
+    }
+
+    bool isResultReady() {
+        return resultReady.load();
+    }
+
+    void stop() {
+        std::unique_lock<std::mutex> lock(mutex);
+        stopThread();
+        condition.notify_all();
+    }
+
+protected:
+    void threadedFunction() override {
+        while (isThreadRunning()) {
+            std::unique_lock<std::mutex> lock(mutex);
+            condition.wait(lock, [this] { return hasNewPrompt || !isThreadRunning(); });
+
+            if (!isThreadRunning()) break;
+
+            std::string localSpeaker = speaker;
+            std::string localBody = text;
+            std::string localEmotion = emotion;
+            float localTemp = llmTemperature;
+
+            hasNewPrompt = false;
+            lock.unlock();  // unlock during HTTP request
+
+            // Do HTTP POST request to FastAPI
+            std::string responseText = makeRequest(localSpeaker, localBody, localEmotion, localTemp);
+
+            lock.lock();
+            result = responseText;
+            resultReady = true;
+        }
+    }
+
+    std::string makeRequest(const std::string& speaker_, const std::string& text_, const std::string& emotion_, const float temp_) {
+        ofxTCPClient client;
+        std::string host = "127.0.0.1"; // or extract from your url
+        int port = 8000;                // or extract from your url
+
+        // Connect to server
+        if (!client.setup(host, port, false)) {
+            return "Error: Could not connect";
+        }
+
+        // Prepare HTTP POST request
+        // Build JSON body with all fields
+        std::string body = "{\"speaker\":\"" + speaker_ + "\","
+                   "\"sentence\":\"" + text_ + "\","
+                   "\"emotion\":\"" + emotion_ + "\","
+                   "\"temp\":" + ofToString(temp_) + "}";
+
+        ofLog() << body;
+
+        std::string request =
+            "POST /generate HTTP/1.1\r\n"
+            "Host: " + host + "\r\n"
+            "Content-Type: application/json\r\n"
+            "Content-Length: " + std::to_string(body.size()) + "\r\n"
+            "Connection: close\r\n"
+            "\r\n" +
+            body;
+
+        client.sendRaw(request);
+
+        // Wait for response (simple, not robust)
+        std::string response;
+        uint64_t startTime = ofGetElapsedTimeMillis();
+        while (ofGetElapsedTimeMillis() - startTime < 3000) { // 3s timeout
+            std::string received = client.receiveRaw();
+            if (!received.empty()) {
+                response += received;
+            }
+            if (response.find("\r\n\r\n") != std::string::npos) break; // End of headers
+            ofSleepMillis(10);
+        }
+        client.close();
+
+        // Extract body (after \r\n\r\n)
+        size_t pos = response.find("\r\n\r\n");
+        if (pos != std::string::npos) {
+            std::string body = response.substr(pos + 4);
+            // Optionally parse JSON here
+            return body;
+        } else {
+            return "Error: No response body";
+        }
+    }
+
+private:
+    std::string url;
+    std::string prompt;
+
+    std::string speaker;
+    std::string text;
+    std::string emotion;
+    float llmTemperature;
+
+    std::string result;
+    std::condition_variable condition;
+    std::mutex mutex;
+    std::atomic<bool> resultReady;
+    bool hasNewPrompt = false;
+};
--- a/src/ofApp.cpp
+++ b/src/ofApp.cpp
@ -29,7 +29,7 @@ void ofApp::update(){
    // Get the intensity, emotion of highest intensity, and number of people detected
    teleprompter->updateCVData(onnx.detectedFaces.size(), onnx.dominantEmotion, onnx.highestEmotionIntensity);
    
-    ofLog() << ofGetFrameRate();
+    //ofLog() << ofGetFrameRate();
 }

 //--------------------------------------------------------------
--- a/src/ofTeleprompter.cpp
+++ b/src/ofTeleprompter.cpp
@ -15,6 +15,9 @@ void ofTeleprompter::setup() {
    displayedSentence.clear();
    currentLetterIndex = 0;
    lastWordTime = ofGetElapsedTimeMillis();
+
+    // Setup the LLMThread
+    llmThread.setup("http://localhost:8000/generate");
 }

 void ofTeleprompter::update() {
@ -32,6 +35,22 @@ void ofTeleprompter::update() {
            lastWordTime = now;
        }
    }
+
+    // Waits for llm thread to send a response before displaying!
+    if (waitingForLLM && llmThread.isResultReady()) {
+        llmResponse = llmThread.getResult();
+        if (llmResponse.empty()) {
+            ofLogError() << "LLM response is empty!";
+        } else {
+            ofJson json = ofJson::parse(llmResponse);
+            std::string responseText = json.value("response", "");
+            currentSentence = responseText;
+            displayedSentence.clear();
+            currentLetterIndex = 0;
+            lastWordTime = ofGetElapsedTimeMillis();
+            waitingForLLM = false;
+        }
+    }
 }

 void ofTeleprompter::draw() {
@ -44,6 +63,9 @@ void ofTeleprompter::setupGUI() {
    nextLine.addListener(this, &ofTeleprompter::nextLinePressed);
    reset.addListener(this, &ofTeleprompter::resetScript);

+    useLLMOnly.addListener(this, &ofTeleprompter::toggleOffText);
+    useTextOnly.addListener(this, &ofTeleprompter::toggleOffLLM);
+
    gui.setDefaultWidth(400);
    gui.setup();
    gui.add(currentLineIndex.setup("Current Line Index", "NULL"));
@ -52,7 +74,7 @@ void ofTeleprompter::setupGUI() {
    gui.add(facesDetected.setup("Faces Detected", "NULL"));
    gui.add(emotionIntensity.setup("Intensity", "NULL"));
    gui.add(emotionDetected.setup("Emotion Detected", "NULL"));
-    gui.add(temperature.setup("Temperature", 0, 0, 1.5));
+    gui.add(temperature.setup("Temperature", 0.7, 0, 1.5));
    gui.add(useLLMOnly.setup("Use LLM Only", false));
    gui.add(useTextOnly.setup("Use Text Only", false));
    gui.add(nextLine.setup("Next Line"));
@ -157,24 +179,47 @@ std::string ofTeleprompter::wrapStringToWidth(const std::string& text, float max
 }

 void ofTeleprompter::updateCVData(int numOfFacesDetected, std::string emotion, float intensity) {
-    facesDetected = ofToString(numOfFacesDetected);
    emotionDetected = emotion;
+    currentEmotionDetetced = emotion;
+    currentEmotionIntensity = intensity;
+
+    // Debug Values
+    facesDetected = ofToString(numOfFacesDetected);
    emotionIntensity = ofToString(intensity);
 }

 void ofTeleprompter::nextLinePressed() {
+
+    // Check if llm thread is already running
+    if (waitingForLLM) {
+        ofLogWarning() << "LLM is still generating. Please wait.";
+        return;
+    }
+
    ofLog() << "Next Line!";

-    // Check if it exceeds the length of the script
-    if (currentLine < script.size()){
+    if (currentLine < script.size()) {
        currentLine++;
    }

-    // Prepare teleprompter effect for letter-by-letter
+    // If values reach a certain threshold or LLM only is on, and useTextOnly is false -> request a reponse from the llm
+    if (((currentEmotionIntensity > 0.8 && currentEmotionDetetced != "neutral") || useLLMOnly) && !useTextOnly) {
+        ofLog() << "Generate Line!";
+
+        std::string speaker = script[currentLine - 1].speaker;
+        std::string sentence = script[currentLine - 1].sentence;
+        std::string emotion = script[currentLine].emotion;
+
+
+        llmThread.requestPrompt(speaker, sentence, emotion, temperature);
+        waitingForLLM = true;
+        // Don't set currentSentence yet!
+    } else {
        currentSentence = script[currentLine].sentence;
        displayedSentence.clear();
        currentLetterIndex = 0;
        lastWordTime = ofGetElapsedTimeMillis();
+    }
 }

 void ofTeleprompter::resetScript() {
@ -188,3 +233,15 @@ void ofTeleprompter::resetScript() {
    currentLetterIndex = 0;
    lastWordTime = ofGetElapsedTimeMillis();
 }
+
+void ofTeleprompter::toggleOffLLM(bool & val) {
+    if (val) {
+        useLLMOnly = false;
+    }
+}
+
+void ofTeleprompter::toggleOffText(bool & val) {
+    if (val) {
+        useTextOnly = false;
+    }
+}
--- a/src/ofTeleprompter.h
+++ b/src/ofTeleprompter.h
@ -1,6 +1,7 @@
 #pragma once
 #include "ofMain.h"
 #include "ofxGui.h"
+#include "LLMRequestThread.h"

 struct Line {
    int idx;
@ -17,10 +18,14 @@ class ofTeleprompter: public ofBaseApp{
        void setupGUI();
        void nextLinePressed();
        void resetScript();
+        void sendLLMRequest();
        void loadText();
        void drawText();
        void updateCVData(int numOfFacesDetected, std::string emotion, float intensity);

+        void toggleOffLLM(bool & val);
+        void toggleOffText(bool & val);
+
        std::string wrapStringToWidth(const std::string& text, float maxWidth);

        ofxPanel gui;
@ -56,6 +61,13 @@ class ofTeleprompter: public ofBaseApp{
        ofTrueTypeFont textFont;
        ofTrueTypeFont detailsFont;

+        /* llm stuff */
+        LLMRequestThread llmThread; 
+        std::string llmResponse;
+        float currentEmotionIntensity = 0;
+        std::string currentEmotionDetetced = "neutral";
+        bool waitingForLLM = false;
+        
    private:
        
 };