sanjaystarc commited on
Commit
1a9bac4
·
verified ·
1 Parent(s): 062cf01

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +11 -14
voice_agent.py CHANGED
@@ -1,14 +1,13 @@
1
  import os
2
- import base64
3
  from dotenv import load_dotenv
4
 
5
- # Disable CrewAI from using OpenAI / Anthropic / Cohere
6
  os.environ["OPENAI_API_KEY"] = ""
7
  os.environ["ANTHROPIC_API_KEY"] = ""
8
  os.environ["COHERE_API_KEY"] = ""
9
 
10
  from crewai import Agent, Task, Crew
11
- from google import genai # New Gemini SDK
12
 
13
  load_dotenv()
14
 
@@ -16,17 +15,15 @@ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
16
 
17
  class CrewVoiceAgent:
18
  def __init__(self):
19
-
20
- # CrewAI Agent (LLM disabled)
21
  self.agent = Agent(
22
  role="Voice Assistant",
23
- goal="Respond with clear, friendly spoken language.",
24
- backstory="You are a helpful voice AI.",
25
  llm=None
26
  )
27
 
28
  self.task = Task(
29
- description="Convert the user's speech into a helpful spoken reply.",
30
  expected_output="Audio response",
31
  agent=self.agent,
32
  llm=None
@@ -40,7 +37,7 @@ class CrewVoiceAgent:
40
 
41
  async def handle_audio(self, audio_bytes: bytes):
42
 
43
- # Send binary audio directly to Gemini
44
  response = client.models.generate_content(
45
  model="gemini-2.5-flash-native-audio-dialog",
46
  contents=[
@@ -51,9 +48,9 @@ class CrewVoiceAgent:
51
  ]
52
  )
53
 
54
- # Build final audio from streaming chunks
55
- output_audio = b""
56
- for chunk in response.iter_audio():
57
- output_audio += chunk
58
 
59
- return output_audio
 
1
  import os
 
2
  from dotenv import load_dotenv
3
 
4
+ # block OpenAI fallbacks
5
  os.environ["OPENAI_API_KEY"] = ""
6
  os.environ["ANTHROPIC_API_KEY"] = ""
7
  os.environ["COHERE_API_KEY"] = ""
8
 
9
  from crewai import Agent, Task, Crew
10
+ from google import genai # new official SDK
11
 
12
  load_dotenv()
13
 
 
15
 
16
  class CrewVoiceAgent:
17
  def __init__(self):
 
 
18
  self.agent = Agent(
19
  role="Voice Assistant",
20
+ goal="Respond to user voice with clarity.",
21
+ backstory="Friendly AI voice assistant.",
22
  llm=None
23
  )
24
 
25
  self.task = Task(
26
+ description="Produce a spoken response to the user.",
27
  expected_output="Audio response",
28
  agent=self.agent,
29
  llm=None
 
37
 
38
  async def handle_audio(self, audio_bytes: bytes):
39
 
40
+ # Model expects: contents = [{mime_type, data}]
41
  response = client.models.generate_content(
42
  model="gemini-2.5-flash-native-audio-dialog",
43
  contents=[
 
48
  ]
49
  )
50
 
51
+ # HF Spaces CANNOT stream audio chunks
52
+ # so we use response.audio.data directly
53
+ if hasattr(response, "audio") and hasattr(response.audio, "data"):
54
+ return response.audio.data
55
 
56
+ return b"" # fallback if no audio