File size: 3,523 Bytes
0fb0dd8
 
 
 
 
 
 
a2b7dcd
0fb0dd8
 
 
 
 
a90d301
 
 
0fb0dd8
a90d301
 
 
 
 
 
 
 
 
 
 
 
 
33aa94a
ce1ade9
 
a90d301
 
 
33aa94a
 
 
 
ce1ade9
a90d301
33aa94a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a90d301
 
 
 
 
 
 
 
 
 
 
 
 
ce1ade9
33aa94a
a90d301
 
 
 
 
 
 
 
 
 
 
 
0fb0dd8
 
 
 
33aa94a
 
 
 
0fb0dd8
a90d301
0fb0dd8
a90d301
a2b7dcd
 
33aa94a
a90d301
1706a47
33aa94a
a90d301
a2b7dcd
33aa94a
a90d301
 
 
a2b7dcd
a90d301
 
a2b7dcd
0fb0dd8
33aa94a
0fb0dd8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import base64
import uvicorn
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from voice_agent import CrewVoiceAgent

app = FastAPI()
agent = CrewVoiceAgent()

@app.get("/")
async def home():
    return HTMLResponse("""
    <html>
    <head>
        <title>Gemini CrewAI Voice Agent</title>
    </head>
    <body>
        <h1>🎤 Gemini CrewAI Voice Agent</h1>

        <button onclick="start()">Start</button>
        <button onclick="stop()">Stop</button>

        <p>Status: <span id='status'>Idle</span></p>

        <script>
        let ws;
        let rec;

        async function start() {

            // HuggingFace Spaces require absolute WebSocket domain
            const wsUrl = "wss://sanjaystarc-voice-agent.hf.space/ws";
            console.log("Connecting WebSocket:", wsUrl);

            ws = new WebSocket(wsUrl);

            // ---------------------------------------
            // WAIT FOR WEBSOCKET TO OPEN
            // ---------------------------------------
            ws.onopen = async () => {
                console.log("WebSocket connected.");
                document.getElementById("status").innerText = "Connected. Listening…";

                // NOW start microphone recording
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                rec = new MediaRecorder(stream, { mimeType: "audio/webm" });

                rec.ondataavailable = async (e) => {

                    // Only send audio if WS is open
                    if (ws.readyState === WebSocket.OPEN) {
                        ws.send(await e.data.arrayBuffer());
                    } else {
                        console.warn("Skipping audio chunk (WebSocket not open)");
                    }
                };

                rec.start(300); // send chunks every 300 ms
            };

            ws.onerror = (e) => {
                console.error("WebSocket error:", e);
                document.getElementById("status").innerText = "WebSocket Error!";
            };

            ws.onclose = () => {
                console.log("WebSocket closed.");
                document.getElementById("status").innerText = "Disconnected";
            };

            ws.onmessage = (event) => {
                console.log("Audio reply received.");

                const audio = new Audio("data:audio/wav;base64," + event.data);
                audio.play();
            };
        }

        function stop() {
            if (rec) rec.stop();
            if (ws) ws.close();
            document.getElementById("status").innerText = "Stopped";
        }
        </script>

    </body>
    </html>
    """)


# -------------------------------------------------------
# BACKEND: WEBSOCKET AUDIO ENDPOINT
# -------------------------------------------------------
@app.websocket("/ws")
async def websocket_endpoint(ws: WebSocket):
    await ws.accept()

    while True:
        try:
            # receive binary audio
            audio_bytes = await ws.receive_bytes()

            # send audio to Gemini voice agent
            reply_audio = await agent.handle_audio(audio_bytes)

            # if Gemini returned audio, send it to browser
            if reply_audio:
                b64 = base64.b64encode(reply_audio).decode()
                await ws.send_text(b64)

        except Exception as e:
            print("WebSocket error:", e)
            break


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)