| import os |
| import torch |
| import numpy as np |
| from fastapi import FastAPI, UploadFile, Form |
| from fastapi.responses import FileResponse |
| from TTS.api import TTS |
| import tempfile |
| import soundfile as sf |
|
|
| |
| os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
| |
| original_torch_load = torch.load |
|
|
| def patched_torch_load(f, *args, **kwargs): |
| kwargs["weights_only"] = False |
| return original_torch_load(f, *args, **kwargs) |
|
|
| torch.load = patched_torch_load |
|
|
| |
| tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2") |
|
|
| app = FastAPI() |
|
|
| @app.post("/generate-audio/") |
| async def generate_audio( |
| text: str = Form(...), |
| language: str = Form(...), |
| speaker_wav: UploadFile = Form(...) |
| ): |
| print("PRIOR WITH") |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
| contents = await speaker_wav.read() |
| tmp.write(contents) |
| tmp_path = tmp.name |
|
|
| print("PRIOR AUDIO") |
| |
| audio = tts.tts( |
| text=text, |
| speaker_wav=tmp_path, |
| language=language, |
| split_sentences=True, |
| emotion="Angry" |
| ) |
|
|
| print("PRIOR MKTEMP") |
| |
| out_path = tempfile.mktemp(suffix=".wav") |
| sf.write(out_path, audio, 24000) |
|
|
| print("PRIOR RETURN") |
| return FileResponse(out_path, media_type="audio/wav", filename="output.wav") |
|
|