| import os
|
| import torch
|
| import numpy as np
|
| from fastapi import FastAPI, UploadFile, Form
|
| from fastapi.responses import FileResponse
|
| from TTS.api import TTS
|
| import tempfile
|
| import soundfile as sf
|
|
|
|
|
| os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
|
|
| original_torch_load = torch.load
|
|
|
| def patched_torch_load(f, *args, **kwargs):
|
| kwargs["weights_only"] = False
|
| return original_torch_load(f, *args, **kwargs)
|
|
|
| torch.load = patched_torch_load
|
|
|
|
|
| tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
|
|
|
| app = FastAPI()
|
|
|
| @app.post("/generate-audio/")
|
| async def generate_audio(
|
| text: str = Form(...),
|
| language: str = Form(...),
|
| speaker_wav: UploadFile = Form(...)
|
| ):
|
|
|
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
| contents = await speaker_wav.read()
|
| tmp.write(contents)
|
| tmp_path = tmp.name
|
|
|
|
|
| audio = tts.tts(
|
| text=text,
|
| speaker_wav=tmp_path,
|
| language=language,
|
| split_sentences=True,
|
| emotion="Angry"
|
| )
|
|
|
|
|
| out_path = tempfile.mktemp(suffix=".wav")
|
| sf.write(out_path, audio, 24000)
|
|
|
| return FileResponse(out_path, media_type="audio/wav", filename="output.wav")
|
|
|