Spaces:

sanjaystarc
/

voice-agent

Sleeping

App Files Files Community

sanjaystarc commited on Dec 25, 2025

Commit

33aa94a

verified ·

1 Parent(s): ce1ade9

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -14

app.py CHANGED Viewed

@@ -28,15 +28,34 @@ async def home():
         async function start() {
-            // IMPORTANT: HuggingFace requires absolute WebSocket domain
             const wsUrl = "wss://sanjaystarc-voice-agent.hf.space/ws";
             console.log("Connecting WebSocket:", wsUrl);
             ws = new WebSocket(wsUrl);
-            ws.onopen = () => {
                 console.log("WebSocket connected.");
                 document.getElementById("status").innerText = "Connected. Listening…";
             };
             ws.onerror = (e) => {
@@ -51,21 +70,10 @@ async def home():
             ws.onmessage = (event) => {
                 console.log("Audio reply received.");
                 const audio = new Audio("data:audio/wav;base64," + event.data);
                 audio.play();
             };
-            // Microphone access
-            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-            rec = new MediaRecorder(stream, { mimeType: "audio/webm" });
-            rec.ondataavailable = async (e) => {
-                ws.send(await e.data.arrayBuffer());  // send raw audio to backend
-            };
-            rec.start(300); // send chunks every 300 ms
-            document.getElementById("status").innerText = "Listening…";
         }
         function stop() {
@@ -79,16 +87,23 @@ async def home():
     </html>
     """)
 @app.websocket("/ws")
 async def websocket_endpoint(ws: WebSocket):
     await ws.accept()
     while True:
         try:
             audio_bytes = await ws.receive_bytes()
             reply_audio = await agent.handle_audio(audio_bytes)
             if reply_audio:
                 b64 = base64.b64encode(reply_audio).decode()
                 await ws.send_text(b64)
@@ -97,5 +112,6 @@ async def websocket_endpoint(ws: WebSocket):
             print("WebSocket error:", e)
             break
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

         async function start() {
+            // HuggingFace Spaces require absolute WebSocket domain
             const wsUrl = "wss://sanjaystarc-voice-agent.hf.space/ws";
             console.log("Connecting WebSocket:", wsUrl);
             ws = new WebSocket(wsUrl);
+            // ---------------------------------------
+            // WAIT FOR WEBSOCKET TO OPEN
+            // ---------------------------------------
+            ws.onopen = async () => {
                 console.log("WebSocket connected.");
                 document.getElementById("status").innerText = "Connected. Listening…";
+                // NOW start microphone recording
+                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                rec = new MediaRecorder(stream, { mimeType: "audio/webm" });
+                rec.ondataavailable = async (e) => {
+                    // Only send audio if WS is open
+                    if (ws.readyState === WebSocket.OPEN) {
+                        ws.send(await e.data.arrayBuffer());
+                    } else {
+                        console.warn("Skipping audio chunk (WebSocket not open)");
+                    }
+                };
+                rec.start(300); // send chunks every 300 ms
             };
             ws.onerror = (e) => {
             ws.onmessage = (event) => {
                 console.log("Audio reply received.");
                 const audio = new Audio("data:audio/wav;base64," + event.data);
                 audio.play();
             };
         }
         function stop() {
     </html>
     """)
+# -------------------------------------------------------
+# BACKEND: WEBSOCKET AUDIO ENDPOINT
+# -------------------------------------------------------
 @app.websocket("/ws")
 async def websocket_endpoint(ws: WebSocket):
     await ws.accept()
     while True:
         try:
+            # receive binary audio
             audio_bytes = await ws.receive_bytes()
+            # send audio to Gemini voice agent
             reply_audio = await agent.handle_audio(audio_bytes)
+            # if Gemini returned audio, send it to browser
             if reply_audio:
                 b64 = base64.b64encode(reply_audio).decode()
                 await ws.send_text(b64)
             print("WebSocket error:", e)
             break
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)