sanjaystarc commited on
Commit
eeec86a
·
verified ·
1 Parent(s): cd5c014

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +17 -19
voice_agent.py CHANGED
@@ -2,61 +2,59 @@ import os
2
  import base64
3
  from dotenv import load_dotenv
4
 
5
- # Disable CrewAI from auto-loading any LLM providers
6
- os.environ["OPENAI_API_KEY"] = "" # force-empty (prevents fallback)
7
- os.environ["ANTHROPIC_API_KEY"] = "" # prevent fallback
8
- os.environ["COHERE_API_KEY"] = "" # prevent fallback
9
 
10
  from crewai import Agent, Task, Crew
11
  import google.generativeai as genai
12
 
13
  load_dotenv()
14
 
15
- # Load Gemini API key from HF Secret
16
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
  class CrewVoiceAgent:
19
  def __init__(self):
20
 
21
- # IMPORTANT: Disable any CrewAI LLM usage
22
  self.agent = Agent(
23
  role="Voice Assistant",
24
- goal="Help the user through voice responses.",
25
- backstory="You are a friendly voice assistant.",
26
- llm=None # <- This stops CrewAI LLM completely
27
  )
28
 
29
  self.task = Task(
30
- description="Handle speech input and generate spoken response.",
31
  agent=self.agent,
32
- llm=None # <- Critical
33
  )
34
 
35
  self.crew = Crew(
36
  agents=[self.agent],
37
  tasks=[self.task],
38
- llm=None # <- Critical
39
  )
40
 
41
  # Gemini Native Audio Model
42
  self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
43
 
44
  async def handle_audio(self, audio_bytes: bytes):
45
-
46
- # Convert audio to base64
47
- b64 = base64.b64encode(audio_bytes).decode()
48
 
49
  response = self.model.generate_content(
50
  {
51
  "audio": {
52
- "data": b64,
53
  "mime_type": "audio/webm"
54
  }
55
  }
56
  )
57
 
58
- output = b""
59
  for chunk in response.audio:
60
- output += chunk.data
61
 
62
- return output
 
2
  import base64
3
  from dotenv import load_dotenv
4
 
5
+ # Disable CrewAI from auto-loading any OpenAI / Anthropic / Cohere LLMs
6
+ os.environ["OPENAI_API_KEY"] = ""
7
+ os.environ["ANTHROPIC_API_KEY"] = ""
8
+ os.environ["COHERE_API_KEY"] = ""
9
 
10
  from crewai import Agent, Task, Crew
11
  import google.generativeai as genai
12
 
13
  load_dotenv()
14
 
15
+ # Load Gemini API from HuggingFace Secrets
16
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
  class CrewVoiceAgent:
19
  def __init__(self):
20
 
21
+ # Disable CrewAI’s internal LLM
22
  self.agent = Agent(
23
  role="Voice Assistant",
24
+ goal="Respond clearly and naturally via voice.",
25
+ backstory="You are a friendly AI voice companion.",
26
+ llm=None
27
  )
28
 
29
  self.task = Task(
30
+ description="Interpret user speech and generate a spoken response.",
31
  agent=self.agent,
32
+ llm=None
33
  )
34
 
35
  self.crew = Crew(
36
  agents=[self.agent],
37
  tasks=[self.task],
38
+ llm=None
39
  )
40
 
41
  # Gemini Native Audio Model
42
  self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
43
 
44
  async def handle_audio(self, audio_bytes: bytes):
45
+ audio_b64 = base64.b64encode(audio_bytes).decode()
 
 
46
 
47
  response = self.model.generate_content(
48
  {
49
  "audio": {
50
+ "data": audio_b64,
51
  "mime_type": "audio/webm"
52
  }
53
  }
54
  )
55
 
56
+ audio_out = b""
57
  for chunk in response.audio:
58
+ audio_out += chunk.data
59
 
60
+ return audio_out