Spaces:

sanjaystarc
/

voice-agent

Sleeping

App Files Files Community

sanjaystarc commited on Dec 25, 2025

Commit

fa1aeac

verified ·

1 Parent(s): eeec86a

Update voice_agent.py

Browse files

Files changed (1) hide show

voice_agent.py +21 -21

voice_agent.py CHANGED Viewed

@@ -2,33 +2,34 @@ import os
 import base64
 from dotenv import load_dotenv
-# Disable CrewAI from auto-loading any OpenAI / Anthropic / Cohere LLMs
 os.environ["OPENAI_API_KEY"] = ""
 os.environ["ANTHROPIC_API_KEY"] = ""
 os.environ["COHERE_API_KEY"] = ""
 from crewai import Agent, Task, Crew
-import google.generativeai as genai
 load_dotenv()
-# Load Gemini API from HuggingFace Secrets
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
-        # Disable CrewAI’s internal LLM
         self.agent = Agent(
             role="Voice Assistant",
-            goal="Respond clearly and naturally via voice.",
-            backstory="You are a friendly AI voice companion.",
             llm=None
         )
         self.task = Task(
-            description="Interpret user speech and generate a spoken response.",
             agent=self.agent,
             llm=None
         )
@@ -38,23 +39,22 @@ class CrewVoiceAgent:
             llm=None
         )
-        # Gemini Native Audio Model
-        self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
     async def handle_audio(self, audio_bytes: bytes):
-        audio_b64 = base64.b64encode(audio_bytes).decode()
-        response = self.model.generate_content(
-            {
-                "audio": {
-                    "data": audio_b64,
-                    "mime_type": "audio/webm"
                 }
-            }
         )
         audio_out = b""
-        for chunk in response.audio:
-            audio_out += chunk.data
         return audio_out

 import base64
 from dotenv import load_dotenv
+# Prevent CrewAI from loading OpenAI, Anthropic or Cohere
 os.environ["OPENAI_API_KEY"] = ""
 os.environ["ANTHROPIC_API_KEY"] = ""
 os.environ["COHERE_API_KEY"] = ""
 from crewai import Agent, Task, Crew
+from google import genai   # NEW SDK
 load_dotenv()
+# Load Gemini API key from HF Secrets
+client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
 class CrewVoiceAgent:
     def __init__(self):
+        # Disable LLM entirely inside CrewAI
         self.agent = Agent(
             role="Voice Assistant",
+            goal="Respond clearly using synthesized voice.",
+            backstory="You are a friendly AI voice assistant.",
             llm=None
         )
         self.task = Task(
+            description="Interpret audio input and create a spoken answer.",
             agent=self.agent,
+            expected_output="Audio response",   # REQUIRED in new CrewAI
             llm=None
         )
             llm=None
         )
     async def handle_audio(self, audio_bytes: bytes):
+        """Send microphone audio to Gemini and return audio output."""
+        response = client.models.generate_content(
+            model="gemini-2.5-flash-native-audio-dialog",
+            contents=[
+                {
+                    "mime_type": "audio/webm",
+                    "data": audio_bytes,
                 }
+            ]
         )
+        # Collect streamed audio chunks
         audio_out = b""
+        for chunk in response.iter_audio():
+            audio_out += chunk
         return audio_out