sanjaystarc commited on
Commit
7b4a910
Β·
verified Β·
1 Parent(s): 3cce8c8

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +8 -6
voice_agent.py CHANGED
@@ -9,28 +9,31 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
 
10
  class CrewVoiceAgent:
11
  def __init__(self):
 
 
12
  self.agent = Agent(
13
  role="Voice Assistant",
14
  goal="Respond to user queries through voice with clarity and accuracy.",
15
- backstory="You are a friendly, responsive voice agent."
 
16
  )
17
 
18
  self.task = Task(
19
  description="Interpret the user's speech and generate a clear, helpful spoken response.",
20
- agent=self.agent
 
21
  )
22
 
23
  self.crew = Crew(
24
  agents=[self.agent],
25
- tasks=[self.task]
 
26
  )
27
 
28
  self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
29
 
30
  async def handle_audio(self, audio_bytes: bytes):
31
- """Convert microphone input β†’ CrewAI β†’ Gemini Audio β†’ audio output"""
32
 
33
- # Convert raw audio bytes to base64
34
  audio_b64 = base64.b64encode(audio_bytes).decode()
35
 
36
  response = self.model.generate_content(
@@ -42,7 +45,6 @@ class CrewVoiceAgent:
42
  }
43
  )
44
 
45
- # Gemini returns audio chunks
46
  audio_output = b""
47
  for chunk in response.audio:
48
  audio_output += chunk.data
 
9
 
10
  class CrewVoiceAgent:
11
  def __init__(self):
12
+
13
+ # IMPORTANT FIX: Disable CrewAI's default LLM selection
14
  self.agent = Agent(
15
  role="Voice Assistant",
16
  goal="Respond to user queries through voice with clarity and accuracy.",
17
+ backstory="You are a friendly AI voice agent.",
18
+ llm=None # πŸ‘ˆ THIS LINE IS CRITICAL
19
  )
20
 
21
  self.task = Task(
22
  description="Interpret the user's speech and generate a clear, helpful spoken response.",
23
+ agent=self.agent,
24
+ llm=None # πŸ‘ˆ Disable LLM here too
25
  )
26
 
27
  self.crew = Crew(
28
  agents=[self.agent],
29
+ tasks=[self.task],
30
+ llm=None # πŸ‘ˆ Prevent fallback to OpenAI everywhere
31
  )
32
 
33
  self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
34
 
35
  async def handle_audio(self, audio_bytes: bytes):
 
36
 
 
37
  audio_b64 = base64.b64encode(audio_bytes).decode()
38
 
39
  response = self.model.generate_content(
 
45
  }
46
  )
47
 
 
48
  audio_output = b""
49
  for chunk in response.audio:
50
  audio_output += chunk.data