from services.gemini_client import get_gemini_client from google.genai import types async def speech_to_text(audio_bytes: bytes) -> str: client = get_gemini_client() # Correctly wrap audio bytes using types.File audio_file = types.File(data=audio_bytes, mime_type="audio/wav") response = client.models.generate_content( model="gemini-2.5-flash", contents=[audio_file] # <-- pass as a list of types.File ) return response.text