Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

malek-messaoudii

Add tts/stt services

c7fc3b6 5 months ago

1.92 kB

	"""Pydantic schemas for Speech-to-Text and Text-to-Speech endpoints"""

	from pydantic import BaseModel, Field, ConfigDict
	from typing import Optional


	# ================================
	# SPEECH TO TEXT
	# ================================

	class STTResponse(BaseModel):
	"""Response model for Whisper speech → text"""
	model_config = ConfigDict(
	json_schema_extra={
	"example": {
	"text": "hello how are you",
	"model_name": "openai/whisper-large-v3",
	"language": "en",
	"duration_seconds": 3.2
	}
	}
	)

	text: str = Field(..., description="Transcribed text from the input audio")
	model_name: str = Field(..., description="STT model used for inference")
	language: Optional[str] = Field(None, description="Detected language")
	duration_seconds: Optional[float] = Field(
	None,
	description="Approximate audio duration in seconds"
	)


	# ================================
	# TEXT TO SPEECH
	# ================================

	class TTSRequest(BaseModel):
	"""Text input for TTS conversion"""
	model_config = ConfigDict(
	json_schema_extra={
	"example": {
	"text": "Hello, welcome to our AI system."
	}
	}
	)

	text: str = Field(
	..., min_length=1, max_length=500,
	description="Text that will be converted into speech"
	)


	class TTSResponse(BaseModel):
	"""Metadata response for TTS generation"""
	model_config = ConfigDict(
	json_schema_extra={
	"example": {
	"message": "Audio generated successfully",
	"audio_format": "wav",
	"length_seconds": 2.5,
	"model_name": "suno/bark"
	}
	}
	)

	message: str
	audio_format: str
	length_seconds: Optional[float] = None
	model_name: str