Spaces:

sanjaystarc
/

voice-agent

Sleeping

File size: 1,755 Bytes

dffe856
 
cd5c014
3b5e44c
eeec86a
 
 
cd5c014
 
3b5e44c
dffe856
 
cd5c014
fa1aeac
dffe856
 
 
 
 
3b5e44c
 
eeec86a
dffe856
 
 
3b5e44c
 
7b4a910
eeec86a
dffe856
 
 
 
7b4a910
eeec86a
dffe856
 
 
fa1aeac
3b5e44c
 
fa1aeac
 
 
 
 
062cf01
dffe856
fa1aeac
dffe856
 
3b5e44c
 
 
 
 
 
 
 
 
 
 
 
 
dffe856
3b5e44c

import os
from dotenv import load_dotenv

# Disable OpenAI fallback
os.environ["OPENAI_API_KEY"] = ""
os.environ["ANTHROPIC_API_KEY"] = ""
os.environ["COHERE_API_KEY"] = ""

from crewai import Agent, Task, Crew
from google import genai

load_dotenv()

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

class CrewVoiceAgent:
    def __init__(self):
        self.agent = Agent(
            role="Voice Assistant",
            goal="Respond clearly.",
            backstory="You are helpful.",
            llm=None
        )

        self.task = Task(
            description="Respond to speech.",
            expected_output="Audio",
            agent=self.agent,
            llm=None
        )

        self.crew = Crew(
            agents=[self.agent],
            tasks=[self.task],
            llm=None
        )

    async def handle_audio(self, audio_bytes: bytes):

        print("Received audio bytes:", len(audio_bytes))

        response = client.models.generate_content(
            model="gemini-2.5-flash-native-audio-dialog",
            contents=[
                {
                    "mime_type": "audio/webm",
                    "data": audio_bytes
                }
            ]
        )

        print("GEMINI RESPONSE FIELDS:", response)
        print("HAS AUDIO:", hasattr(response, "audio"))

        if hasattr(response, "audio"):
            print("AUDIO OBJECT:", response.audio)

        # Try direct
        try:
            if response.audio and response.audio.data:
                print("Returning audio bytes:", len(response.audio.data))
                return response.audio.data
        except Exception as e:
            print("Error reading audio:", e)

        print("NO AUDIO RETURNED")
        return b""