sanjaystarc commited on
Commit
062cf01
·
verified ·
1 Parent(s): a2b7dcd

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +13 -14
voice_agent.py CHANGED
@@ -2,34 +2,33 @@ import os
2
  import base64
3
  from dotenv import load_dotenv
4
 
5
- # Prevent CrewAI from loading OpenAI, Anthropic or Cohere
6
  os.environ["OPENAI_API_KEY"] = ""
7
  os.environ["ANTHROPIC_API_KEY"] = ""
8
  os.environ["COHERE_API_KEY"] = ""
9
 
10
  from crewai import Agent, Task, Crew
11
- from google import genai # NEW SDK
12
 
13
  load_dotenv()
14
 
15
- # Load Gemini API key from HF Secrets
16
  client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
  class CrewVoiceAgent:
19
  def __init__(self):
20
 
21
- # Disable LLM entirely inside CrewAI
22
  self.agent = Agent(
23
  role="Voice Assistant",
24
- goal="Respond clearly using synthesized voice.",
25
- backstory="You are a friendly AI voice assistant.",
26
  llm=None
27
  )
28
 
29
  self.task = Task(
30
- description="Interpret audio input and create a spoken answer.",
 
31
  agent=self.agent,
32
- expected_output="Audio response", # REQUIRED in new CrewAI
33
  llm=None
34
  )
35
 
@@ -40,21 +39,21 @@ class CrewVoiceAgent:
40
  )
41
 
42
  async def handle_audio(self, audio_bytes: bytes):
43
- """Send microphone audio to Gemini and return audio output."""
44
 
 
45
  response = client.models.generate_content(
46
  model="gemini-2.5-flash-native-audio-dialog",
47
  contents=[
48
  {
49
  "mime_type": "audio/webm",
50
- "data": audio_bytes,
51
  }
52
  ]
53
  )
54
 
55
- # Collect streamed audio chunks
56
- audio_out = b""
57
  for chunk in response.iter_audio():
58
- audio_out += chunk
59
 
60
- return audio_out
 
2
  import base64
3
  from dotenv import load_dotenv
4
 
5
+ # Disable CrewAI from using OpenAI / Anthropic / Cohere
6
  os.environ["OPENAI_API_KEY"] = ""
7
  os.environ["ANTHROPIC_API_KEY"] = ""
8
  os.environ["COHERE_API_KEY"] = ""
9
 
10
  from crewai import Agent, Task, Crew
11
+ from google import genai # New Gemini SDK
12
 
13
  load_dotenv()
14
 
 
15
  client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
16
 
17
  class CrewVoiceAgent:
18
  def __init__(self):
19
 
20
+ # CrewAI Agent (LLM disabled)
21
  self.agent = Agent(
22
  role="Voice Assistant",
23
+ goal="Respond with clear, friendly spoken language.",
24
+ backstory="You are a helpful voice AI.",
25
  llm=None
26
  )
27
 
28
  self.task = Task(
29
+ description="Convert the user's speech into a helpful spoken reply.",
30
+ expected_output="Audio response",
31
  agent=self.agent,
 
32
  llm=None
33
  )
34
 
 
39
  )
40
 
41
  async def handle_audio(self, audio_bytes: bytes):
 
42
 
43
+ # Send binary audio directly to Gemini
44
  response = client.models.generate_content(
45
  model="gemini-2.5-flash-native-audio-dialog",
46
  contents=[
47
  {
48
  "mime_type": "audio/webm",
49
+ "data": audio_bytes
50
  }
51
  ]
52
  )
53
 
54
+ # Build final audio from streaming chunks
55
+ output_audio = b""
56
  for chunk in response.iter_audio():
57
+ output_audio += chunk
58
 
59
+ return output_audio