voice-agent / app.py
sanjaystarc's picture
Update app.py
33aa94a verified
import base64
import uvicorn
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from voice_agent import CrewVoiceAgent
app = FastAPI()
agent = CrewVoiceAgent()
@app.get("/")
async def home():
return HTMLResponse("""
<html>
<head>
<title>Gemini CrewAI Voice Agent</title>
</head>
<body>
<h1>🎤 Gemini CrewAI Voice Agent</h1>
<button onclick="start()">Start</button>
<button onclick="stop()">Stop</button>
<p>Status: <span id='status'>Idle</span></p>
<script>
let ws;
let rec;
async function start() {
// HuggingFace Spaces require absolute WebSocket domain
const wsUrl = "wss://sanjaystarc-voice-agent.hf.space/ws";
console.log("Connecting WebSocket:", wsUrl);
ws = new WebSocket(wsUrl);
// ---------------------------------------
// WAIT FOR WEBSOCKET TO OPEN
// ---------------------------------------
ws.onopen = async () => {
console.log("WebSocket connected.");
document.getElementById("status").innerText = "Connected. Listening…";
// NOW start microphone recording
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
rec = new MediaRecorder(stream, { mimeType: "audio/webm" });
rec.ondataavailable = async (e) => {
// Only send audio if WS is open
if (ws.readyState === WebSocket.OPEN) {
ws.send(await e.data.arrayBuffer());
} else {
console.warn("Skipping audio chunk (WebSocket not open)");
}
};
rec.start(300); // send chunks every 300 ms
};
ws.onerror = (e) => {
console.error("WebSocket error:", e);
document.getElementById("status").innerText = "WebSocket Error!";
};
ws.onclose = () => {
console.log("WebSocket closed.");
document.getElementById("status").innerText = "Disconnected";
};
ws.onmessage = (event) => {
console.log("Audio reply received.");
const audio = new Audio("data:audio/wav;base64," + event.data);
audio.play();
};
}
function stop() {
if (rec) rec.stop();
if (ws) ws.close();
document.getElementById("status").innerText = "Stopped";
}
</script>
</body>
</html>
""")
# -------------------------------------------------------
# BACKEND: WEBSOCKET AUDIO ENDPOINT
# -------------------------------------------------------
@app.websocket("/ws")
async def websocket_endpoint(ws: WebSocket):
await ws.accept()
while True:
try:
# receive binary audio
audio_bytes = await ws.receive_bytes()
# send audio to Gemini voice agent
reply_audio = await agent.handle_audio(audio_bytes)
# if Gemini returned audio, send it to browser
if reply_audio:
b64 = base64.b64encode(reply_audio).decode()
await ws.send_text(b64)
except Exception as e:
print("WebSocket error:", e)
break
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)