Spaces:

sajith-0701
/

interviewbot

Sleeping

App Files Files Community

interviewbot / backend /routers /speech.py

sajith-0701

v3.1

5837391 about 1 month ago

raw

history blame contribute delete

4.27 kB

	from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
	from fastapi.responses import Response
	from pydantic import BaseModel
	from time import perf_counter

	from auth.jwt import get_current_user
	from services.tts_service import synthesize_wav, warmup_xtts_model, get_xtts_warmup_state
	from services.stt_service import transcribe_audio_bytes, warmup_whisper_model
	from services.latency_service import record_latency

	router = APIRouter()


	class SpeechSynthesisRequest(BaseModel):
	text: str
	voice_gender: str = "female"


	@router.get("/health")
	async def speech_health(current_user: dict = Depends(get_current_user)):
	"""Check whether speech route is available for authenticated users."""
	_ = current_user
	state = get_xtts_warmup_state()
	return {
	"status": "ok",
	"service": "speech",
	"xtts_ready": bool(state.get("is_warm")),
	}


	@router.post("/warmup")
	async def speech_warmup(current_user: dict = Depends(get_current_user)):
	"""Warm XTTS model so first interview playback does not hit cold-start delay."""
	_ = current_user
	xtts_ready = await warmup_xtts_model()
	await warmup_whisper_model()

	state = get_xtts_warmup_state()
	if not xtts_ready:
	raise HTTPException(
	status_code=503,
	detail=f"XTTS warmup failed: {state.get('last_error') or 'unknown error'}",
	)

	return {
	"status": "ok",
	"message": "speech model warmed",
	"xtts_ready": True,
	}


	@router.post("/synthesize")
	async def synthesize_speech(
	request: SpeechSynthesisRequest,
	current_user: dict = Depends(get_current_user),
	):
	"""Synthesize text to WAV bytes using Coqui TTS models."""
	try:
	wav_bytes = await synthesize_wav(request.text, request.voice_gender)
	return Response(content=wav_bytes, media_type="audio/wav")
	except ValueError as e:
	raise HTTPException(status_code=400, detail=str(e))
	except RuntimeError as e:
	# XTTS may be in cold-start transition; warm once and retry before failing.
	try:
	xtts_ready = await warmup_xtts_model()
	if not xtts_ready:
	state = get_xtts_warmup_state()
	raise HTTPException(
	status_code=503,
	detail=f"XTTS warmup failed: {state.get('last_error') or str(e)}",
	)
	wav_bytes = await synthesize_wav(request.text, request.voice_gender)
	return Response(content=wav_bytes, media_type="audio/wav")
	except HTTPException:
	raise
	except Exception:
	raise HTTPException(status_code=503, detail=str(e))
	except Exception as e:
	# Retry once after explicit warmup even for non-RuntimeError failures.
	try:
	xtts_ready = await warmup_xtts_model()
	if xtts_ready:
	wav_bytes = await synthesize_wav(request.text, request.voice_gender)
	return Response(content=wav_bytes, media_type="audio/wav")
	except Exception:
	pass

	state = get_xtts_warmup_state()
	raise HTTPException(
	status_code=503,
	detail=f"Speech synthesis backend unavailable: {state.get('last_error') or str(e)}",
	)


	@router.post("/transcribe")
	async def transcribe_speech(
	audio: UploadFile = File(...),
	language: str = Form("en"),
	current_user: dict = Depends(get_current_user),
	):
	"""Transcribe uploaded interview audio using Whisper model."""
	started_at = perf_counter()
	try:
	payload = await audio.read()
	text = await transcribe_audio_bytes(
	audio_bytes=payload,
	filename=audio.filename or "speech.webm",
	language=language,
	)
	elapsed_ms = (perf_counter() - started_at) * 1000.0
	await record_latency("stt_ms", elapsed_ms)
	return {"text": text, "stt_ms": round(elapsed_ms, 2)}
	except ValueError as e:
	raise HTTPException(status_code=400, detail=str(e))
	except RuntimeError as e:
	raise HTTPException(status_code=503, detail=str(e))
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Speech transcription failed: {str(e)}")