sanjaystarc commited on
Commit
fa1aeac
·
verified ·
1 Parent(s): eeec86a

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +21 -21
voice_agent.py CHANGED
@@ -2,33 +2,34 @@ import os
2
  import base64
3
  from dotenv import load_dotenv
4
 
5
- # Disable CrewAI from auto-loading any OpenAI / Anthropic / Cohere LLMs
6
  os.environ["OPENAI_API_KEY"] = ""
7
  os.environ["ANTHROPIC_API_KEY"] = ""
8
  os.environ["COHERE_API_KEY"] = ""
9
 
10
  from crewai import Agent, Task, Crew
11
- import google.generativeai as genai
12
 
13
  load_dotenv()
14
 
15
- # Load Gemini API from HuggingFace Secrets
16
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
  class CrewVoiceAgent:
19
  def __init__(self):
20
 
21
- # Disable CrewAI’s internal LLM
22
  self.agent = Agent(
23
  role="Voice Assistant",
24
- goal="Respond clearly and naturally via voice.",
25
- backstory="You are a friendly AI voice companion.",
26
  llm=None
27
  )
28
 
29
  self.task = Task(
30
- description="Interpret user speech and generate a spoken response.",
31
  agent=self.agent,
 
32
  llm=None
33
  )
34
 
@@ -38,23 +39,22 @@ class CrewVoiceAgent:
38
  llm=None
39
  )
40
 
41
- # Gemini Native Audio Model
42
- self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
43
-
44
  async def handle_audio(self, audio_bytes: bytes):
45
- audio_b64 = base64.b64encode(audio_bytes).decode()
46
-
47
- response = self.model.generate_content(
48
- {
49
- "audio": {
50
- "data": audio_b64,
51
- "mime_type": "audio/webm"
 
52
  }
53
- }
54
  )
55
 
 
56
  audio_out = b""
57
- for chunk in response.audio:
58
- audio_out += chunk.data
59
 
60
  return audio_out
 
2
  import base64
3
  from dotenv import load_dotenv
4
 
5
+ # Prevent CrewAI from loading OpenAI, Anthropic or Cohere
6
  os.environ["OPENAI_API_KEY"] = ""
7
  os.environ["ANTHROPIC_API_KEY"] = ""
8
  os.environ["COHERE_API_KEY"] = ""
9
 
10
  from crewai import Agent, Task, Crew
11
+ from google import genai # NEW SDK
12
 
13
  load_dotenv()
14
 
15
+ # Load Gemini API key from HF Secrets
16
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
  class CrewVoiceAgent:
19
  def __init__(self):
20
 
21
+ # Disable LLM entirely inside CrewAI
22
  self.agent = Agent(
23
  role="Voice Assistant",
24
+ goal="Respond clearly using synthesized voice.",
25
+ backstory="You are a friendly AI voice assistant.",
26
  llm=None
27
  )
28
 
29
  self.task = Task(
30
+ description="Interpret audio input and create a spoken answer.",
31
  agent=self.agent,
32
+ expected_output="Audio response", # REQUIRED in new CrewAI
33
  llm=None
34
  )
35
 
 
39
  llm=None
40
  )
41
 
 
 
 
42
  async def handle_audio(self, audio_bytes: bytes):
43
+ """Send microphone audio to Gemini and return audio output."""
44
+
45
+ response = client.models.generate_content(
46
+ model="gemini-2.5-flash-native-audio-dialog",
47
+ contents=[
48
+ {
49
+ "mime_type": "audio/webm",
50
+ "data": audio_bytes,
51
  }
52
+ ]
53
  )
54
 
55
+ # Collect streamed audio chunks
56
  audio_out = b""
57
+ for chunk in response.iter_audio():
58
+ audio_out += chunk
59
 
60
  return audio_out