Spaces:

sanjaystarc
/

voice-agent

Sleeping

voice-agent / voice_agent.py

Update voice_agent.py

3b5e44c verified 4 months ago

1.76 kB

	import os
	from dotenv import load_dotenv

	# Disable OpenAI fallback
	os.environ["OPENAI_API_KEY"] = ""
	os.environ["ANTHROPIC_API_KEY"] = ""
	os.environ["COHERE_API_KEY"] = ""

	from crewai import Agent, Task, Crew
	from google import genai

	load_dotenv()

	client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

	class CrewVoiceAgent:
	def __init__(self):
	self.agent = Agent(
	role="Voice Assistant",
	goal="Respond clearly.",
	backstory="You are helpful.",
	llm=None
	)

	self.task = Task(
	description="Respond to speech.",
	expected_output="Audio",
	agent=self.agent,
	llm=None
	)

	self.crew = Crew(
	agents=[self.agent],
	tasks=[self.task],
	llm=None
	)

	async def handle_audio(self, audio_bytes: bytes):

	print("Received audio bytes:", len(audio_bytes))

	response = client.models.generate_content(
	model="gemini-2.5-flash-native-audio-dialog",
	contents=[
	{
	"mime_type": "audio/webm",
	"data": audio_bytes
	}
	]
	)

	print("GEMINI RESPONSE FIELDS:", response)
	print("HAS AUDIO:", hasattr(response, "audio"))

	if hasattr(response, "audio"):
	print("AUDIO OBJECT:", response.audio)

	# Try direct
	try:
	if response.audio and response.audio.data:
	print("Returning audio bytes:", len(response.audio.data))
	return response.audio.data
	except Exception as e:
	print("Error reading audio:", e)

	print("NO AUDIO RETURNED")
	return b""