sanjaystarc commited on
Commit
dffe856
·
verified ·
1 Parent(s): 0fb0dd8

Create voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +50 -0
voice_agent.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ from crewai import Agent, Task, Crew
4
+ from dotenv import load_dotenv
5
+ import google.generativeai as genai
6
+
7
+ load_dotenv()
8
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
+
10
+ class CrewVoiceAgent:
11
+ def __init__(self):
12
+ self.agent = Agent(
13
+ role="Voice Assistant",
14
+ goal="Respond to user queries through voice with clarity and accuracy.",
15
+ backstory="You are a friendly, responsive voice agent."
16
+ )
17
+
18
+ self.task = Task(
19
+ description="Interpret the user's speech and generate a clear, helpful spoken response.",
20
+ agent=self.agent
21
+ )
22
+
23
+ self.crew = Crew(
24
+ agents=[self.agent],
25
+ tasks=[self.task]
26
+ )
27
+
28
+ self.model = genai.GenerativeModel("gemini-2.5-flash-native-audio-dialog")
29
+
30
+ async def handle_audio(self, audio_bytes: bytes):
31
+ """Convert microphone input → CrewAI → Gemini Audio → audio output"""
32
+
33
+ # Convert raw audio bytes to base64
34
+ audio_b64 = base64.b64encode(audio_bytes).decode()
35
+
36
+ response = self.model.generate_content(
37
+ {
38
+ "audio": {
39
+ "data": audio_b64,
40
+ "mime_type": "audio/webm"
41
+ }
42
+ }
43
+ )
44
+
45
+ # Gemini returns audio chunks
46
+ audio_output = b""
47
+ for chunk in response.audio:
48
+ audio_output += chunk.data
49
+
50
+ return audio_output