Spaces:

sanjaystarc
/

voice-agent

Sleeping

App Files Files Community

sanjaystarc commited on Dec 25, 2025

Commit

062cf01

verified ·

1 Parent(s): a2b7dcd

Update voice_agent.py

Browse files

Files changed (1) hide show

voice_agent.py +13 -14

voice_agent.py CHANGED Viewed

@@ -2,34 +2,33 @@ import os
 import base64
 from dotenv import load_dotenv
-# Prevent CrewAI from loading OpenAI, Anthropic or Cohere
 os.environ["OPENAI_API_KEY"] = ""
 os.environ["ANTHROPIC_API_KEY"] = ""
 os.environ["COHERE_API_KEY"] = ""
 from crewai import Agent, Task, Crew
-from google import genai   # NEW SDK
 load_dotenv()
-# Load Gemini API key from HF Secrets
 client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
-        # Disable LLM entirely inside CrewAI
         self.agent = Agent(
             role="Voice Assistant",
-            goal="Respond clearly using synthesized voice.",
-            backstory="You are a friendly AI voice assistant.",
             llm=None
         )
         self.task = Task(
-            description="Interpret audio input and create a spoken answer.",
             agent=self.agent,
-            expected_output="Audio response",   # REQUIRED in new CrewAI
             llm=None
         )
@@ -40,21 +39,21 @@ class CrewVoiceAgent:
         )
     async def handle_audio(self, audio_bytes: bytes):
-        """Send microphone audio to Gemini and return audio output."""
         response = client.models.generate_content(
             model="gemini-2.5-flash-native-audio-dialog",
             contents=[
                 {
                     "mime_type": "audio/webm",
-                    "data": audio_bytes,
                 }
             ]
         )
-        # Collect streamed audio chunks
-        audio_out = b""
         for chunk in response.iter_audio():
-            audio_out += chunk
-        return audio_out

 import base64
 from dotenv import load_dotenv
+# Disable CrewAI from using OpenAI / Anthropic / Cohere
 os.environ["OPENAI_API_KEY"] = ""
 os.environ["ANTHROPIC_API_KEY"] = ""
 os.environ["COHERE_API_KEY"] = ""
 from crewai import Agent, Task, Crew
+from google import genai   # New Gemini SDK
 load_dotenv()
 client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
+        # CrewAI Agent (LLM disabled)
         self.agent = Agent(
             role="Voice Assistant",
+            goal="Respond with clear, friendly spoken language.",
+            backstory="You are a helpful voice AI.",
             llm=None
         )
         self.task = Task(
+            description="Convert the user's speech into a helpful spoken reply.",
+            expected_output="Audio response",
             agent=self.agent,
             llm=None
         )
         )
     async def handle_audio(self, audio_bytes: bytes):
+        # Send binary audio directly to Gemini
         response = client.models.generate_content(
             model="gemini-2.5-flash-native-audio-dialog",
             contents=[
                 {
                     "mime_type": "audio/webm",
+                    "data": audio_bytes
                 }
             ]
         )
+        # Build final audio from streaming chunks
+        output_audio = b""
         for chunk in response.iter_audio():
+            output_audio += chunk
+        return output_audio