Spaces:

sanjaystarc
/

voice-agent

Sleeping

sanjaystarc commited on Dec 25, 2025

Commit

7b4a910

verified ·

1 Parent(s): 3cce8c8

Update voice_agent.py

Files changed (1) hide show

voice_agent.py CHANGED Viewed

@@ -9,28 +9,31 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
         self.agent = Agent(
             role="Voice Assistant",
             goal="Respond to user queries through voice with clarity and accuracy.",
-            backstory="You are a friendly, responsive voice agent."
         )
         self.task = Task(
             description="Interpret the user's speech and generate a clear, helpful spoken response.",
-            agent=self.agent
         )
         self.crew = Crew(
             agents=[self.agent],
-            tasks=[self.task]
         )
         self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
     async def handle_audio(self, audio_bytes: bytes):
-        """Convert microphone input → CrewAI → Gemini Audio → audio output"""
-        # Convert raw audio bytes to base64
         audio_b64 = base64.b64encode(audio_bytes).decode()
         response = self.model.generate_content(
@@ -42,7 +45,6 @@ class CrewVoiceAgent:
             }
         )
-        # Gemini returns audio chunks
         audio_output = b""
         for chunk in response.audio:
             audio_output += chunk.data

 class CrewVoiceAgent:
     def __init__(self):
+        # IMPORTANT FIX: Disable CrewAI's default LLM selection
         self.agent = Agent(
             role="Voice Assistant",
             goal="Respond to user queries through voice with clarity and accuracy.",
+            backstory="You are a friendly AI voice agent.",
+            llm=None   # 👈 THIS LINE IS CRITICAL
         )
         self.task = Task(
             description="Interpret the user's speech and generate a clear, helpful spoken response.",
+            agent=self.agent,
+            llm=None   # 👈 Disable LLM here too
         )
         self.crew = Crew(
             agents=[self.agent],
+            tasks=[self.task],
+            llm=None   # 👈 Prevent fallback to OpenAI everywhere
         )
         self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
     async def handle_audio(self, audio_bytes: bytes):
         audio_b64 = base64.b64encode(audio_bytes).decode()
         response = self.model.generate_content(
             }
         )
         audio_output = b""
         for chunk in response.audio:
             audio_output += chunk.data