Spaces:

sanjaystarc
/

voice-agent

Sleeping

App Files Files Community

sanjaystarc commited on Dec 25, 2025

Commit

cd5c014

verified ·

1 Parent(s): 7b4a910

Update voice_agent.py

Browse files

Files changed (1) hide show

voice_agent.py +23 -13

voice_agent.py CHANGED Viewed

@@ -1,52 +1,62 @@
 import os
 import base64
-from crewai import Agent, Task, Crew
 from dotenv import load_dotenv
 import google.generativeai as genai
 load_dotenv()
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
-        # IMPORTANT FIX: Disable CrewAI's default LLM selection
         self.agent = Agent(
             role="Voice Assistant",
-            goal="Respond to user queries through voice with clarity and accuracy.",
-            backstory="You are a friendly AI voice agent.",
-            llm=None   # 👈 THIS LINE IS CRITICAL
         )
         self.task = Task(
-            description="Interpret the user's speech and generate a clear, helpful spoken response.",
             agent=self.agent,
-            llm=None   # 👈 Disable LLM here too
         )
         self.crew = Crew(
             agents=[self.agent],
             tasks=[self.task],
-            llm=None   # 👈 Prevent fallback to OpenAI everywhere
         )
         self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
     async def handle_audio(self, audio_bytes: bytes):
-        audio_b64 = base64.b64encode(audio_bytes).decode()
         response = self.model.generate_content(
             {
                 "audio": {
-                    "data": audio_b64,
                     "mime_type": "audio/webm"
                 }
             }
         )
-        audio_output = b""
         for chunk in response.audio:
-            audio_output += chunk.data
-        return audio_output

 import os
 import base64
 from dotenv import load_dotenv
+# Disable CrewAI from auto-loading any LLM providers
+os.environ["OPENAI_API_KEY"] = ""       # force-empty (prevents fallback)
+os.environ["ANTHROPIC_API_KEY"] = ""    # prevent fallback
+os.environ["COHERE_API_KEY"] = ""       # prevent fallback
+from crewai import Agent, Task, Crew
 import google.generativeai as genai
 load_dotenv()
+# Load Gemini API key from HF Secret
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
+        # IMPORTANT: Disable any CrewAI LLM usage
         self.agent = Agent(
             role="Voice Assistant",
+            goal="Help the user through voice responses.",
+            backstory="You are a friendly voice assistant.",
+            llm=None      # <- This stops CrewAI LLM completely
         )
         self.task = Task(
+            description="Handle speech input and generate spoken response.",
             agent=self.agent,
+            llm=None      # <- Critical
         )
         self.crew = Crew(
             agents=[self.agent],
             tasks=[self.task],
+            llm=None      # <- Critical
         )
+        # Gemini Native Audio Model
         self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
     async def handle_audio(self, audio_bytes: bytes):
+        # Convert audio to base64
+        b64 = base64.b64encode(audio_bytes).decode()
         response = self.model.generate_content(
             {
                 "audio": {
+                    "data": b64,
                     "mime_type": "audio/webm"
                 }
             }
         )
+        output = b""
         for chunk in response.audio:
+            output += chunk.data
+        return output