Spaces:

sanjaystarc
/

voice-agent

Sleeping

sanjaystarc commited on Dec 25, 2025

Commit

dffe856

verified ·

1 Parent(s): 0fb0dd8

Create voice_agent.py

Files changed (1) hide show

voice_agent.py ADDED Viewed

+import os
+import base64
+from crewai import Agent, Task, Crew
+from dotenv import load_dotenv
+import google.generativeai as genai
+load_dotenv()
+genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
+class CrewVoiceAgent:
+    def __init__(self):
+        self.agent = Agent(
+            role="Voice Assistant",
+            goal="Respond to user queries through voice with clarity and accuracy.",
+            backstory="You are a friendly, responsive voice agent."
+        )
+        self.task = Task(
+            description="Interpret the user's speech and generate a clear, helpful spoken response.",
+            agent=self.agent
+        )
+        self.crew = Crew(
+            agents=[self.agent],
+            tasks=[self.task]
+        )
+        self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
+    async def handle_audio(self, audio_bytes: bytes):
+        """Convert microphone input → CrewAI → Gemini Audio → audio output"""
+        # Convert raw audio bytes to base64
+        audio_b64 = base64.b64encode(audio_bytes).decode()
+        response = self.model.generate_content(
+            {
+                "audio": {
+                    "data": audio_b64,
+                    "mime_type": "audio/webm"
+                }
+            }
+        )
+        # Gemini returns audio chunks
+        audio_output = b""
+        for chunk in response.audio:
+            audio_output += chunk.data
+        return audio_output