voice-agent / voice_agent.py
sanjaystarc's picture
Update voice_agent.py
3b5e44c verified
import os
from dotenv import load_dotenv
# Disable OpenAI fallback
os.environ["OPENAI_API_KEY"] = ""
os.environ["ANTHROPIC_API_KEY"] = ""
os.environ["COHERE_API_KEY"] = ""
from crewai import Agent, Task, Crew
from google import genai
load_dotenv()
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
class CrewVoiceAgent:
def __init__(self):
self.agent = Agent(
role="Voice Assistant",
goal="Respond clearly.",
backstory="You are helpful.",
llm=None
)
self.task = Task(
description="Respond to speech.",
expected_output="Audio",
agent=self.agent,
llm=None
)
self.crew = Crew(
agents=[self.agent],
tasks=[self.task],
llm=None
)
async def handle_audio(self, audio_bytes: bytes):
print("Received audio bytes:", len(audio_bytes))
response = client.models.generate_content(
model="gemini-2.5-flash-native-audio-dialog",
contents=[
{
"mime_type": "audio/webm",
"data": audio_bytes
}
]
)
print("GEMINI RESPONSE FIELDS:", response)
print("HAS AUDIO:", hasattr(response, "audio"))
if hasattr(response, "audio"):
print("AUDIO OBJECT:", response.audio)
# Try direct
try:
if response.audio and response.audio.data:
print("Returning audio bytes:", len(response.audio.data))
return response.audio.data
except Exception as e:
print("Error reading audio:", e)
print("NO AUDIO RETURNED")
return b""