sanjaystarc commited on
Commit
cd5c014
·
verified ·
1 Parent(s): 7b4a910

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +23 -13
voice_agent.py CHANGED
@@ -1,52 +1,62 @@
1
  import os
2
  import base64
3
- from crewai import Agent, Task, Crew
4
  from dotenv import load_dotenv
 
 
 
 
 
 
 
5
  import google.generativeai as genai
6
 
7
  load_dotenv()
 
 
8
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
 
10
  class CrewVoiceAgent:
11
  def __init__(self):
12
 
13
- # IMPORTANT FIX: Disable CrewAI's default LLM selection
14
  self.agent = Agent(
15
  role="Voice Assistant",
16
- goal="Respond to user queries through voice with clarity and accuracy.",
17
- backstory="You are a friendly AI voice agent.",
18
- llm=None # 👈 THIS LINE IS CRITICAL
19
  )
20
 
21
  self.task = Task(
22
- description="Interpret the user's speech and generate a clear, helpful spoken response.",
23
  agent=self.agent,
24
- llm=None # 👈 Disable LLM here too
25
  )
26
 
27
  self.crew = Crew(
28
  agents=[self.agent],
29
  tasks=[self.task],
30
- llm=None # 👈 Prevent fallback to OpenAI everywhere
31
  )
32
 
 
33
  self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
34
 
35
  async def handle_audio(self, audio_bytes: bytes):
36
 
37
- audio_b64 = base64.b64encode(audio_bytes).decode()
 
38
 
39
  response = self.model.generate_content(
40
  {
41
  "audio": {
42
- "data": audio_b64,
43
  "mime_type": "audio/webm"
44
  }
45
  }
46
  )
47
 
48
- audio_output = b""
49
  for chunk in response.audio:
50
- audio_output += chunk.data
51
 
52
- return audio_output
 
1
  import os
2
  import base64
 
3
  from dotenv import load_dotenv
4
+
5
+ # Disable CrewAI from auto-loading any LLM providers
6
+ os.environ["OPENAI_API_KEY"] = "" # force-empty (prevents fallback)
7
+ os.environ["ANTHROPIC_API_KEY"] = "" # prevent fallback
8
+ os.environ["COHERE_API_KEY"] = "" # prevent fallback
9
+
10
+ from crewai import Agent, Task, Crew
11
  import google.generativeai as genai
12
 
13
  load_dotenv()
14
+
15
+ # Load Gemini API key from HF Secret
16
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
  class CrewVoiceAgent:
19
  def __init__(self):
20
 
21
+ # IMPORTANT: Disable any CrewAI LLM usage
22
  self.agent = Agent(
23
  role="Voice Assistant",
24
+ goal="Help the user through voice responses.",
25
+ backstory="You are a friendly voice assistant.",
26
+ llm=None # <- This stops CrewAI LLM completely
27
  )
28
 
29
  self.task = Task(
30
+ description="Handle speech input and generate spoken response.",
31
  agent=self.agent,
32
+ llm=None # <- Critical
33
  )
34
 
35
  self.crew = Crew(
36
  agents=[self.agent],
37
  tasks=[self.task],
38
+ llm=None # <- Critical
39
  )
40
 
41
+ # Gemini Native Audio Model
42
  self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
43
 
44
  async def handle_audio(self, audio_bytes: bytes):
45
 
46
+ # Convert audio to base64
47
+ b64 = base64.b64encode(audio_bytes).decode()
48
 
49
  response = self.model.generate_content(
50
  {
51
  "audio": {
52
+ "data": b64,
53
  "mime_type": "audio/webm"
54
  }
55
  }
56
  )
57
 
58
+ output = b""
59
  for chunk in response.audio:
60
+ output += chunk.data
61
 
62
+ return output