XTTS Voice Studio

# ╔══════════════════════════════════════════════════════════════════╗ # ║ XTTS v2 Advanced Voice Studio — HuggingFace Space ║ # ╚══════════════════════════════════════════════════════════════════╝ import os, sys, time, json, uuid, shutil, threading import uvicorn from fastapi import FastAPI, Form, File, UploadFile, HTTPException from fastapi.responses import HTMLResponse, FileResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware import torch # ── Env ────────────────────────────────────────────────────────────── os.environ["COQUI_TOS_AGREED"] = "1" # Point HF cache to a writable directory inside the Space os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface") VOICE_LIB = "/home/user/app/voice_library" OUTPUT_DIR = "/home/user/app/outputs" HISTORY_FILE = "/home/user/app/history.json" for d in [VOICE_LIB, OUTPUT_DIR]: os.makedirs(d, exist_ok=True) # ── Load TTS on CPU ────────────────────────────────────────────────── from TTS.api import TTS # HF Spaces free tier is CPU-only; force CPU explicitly device = "cpu" print(f"[*] Loading XTTS v2 on {device.upper()} …") xtts_engine = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) print("[✓] Model ready.") # ── History helpers ────────────────────────────────────────────────── def load_history(): if os.path.exists(HISTORY_FILE): try: return json.load(open(HISTORY_FILE)) except Exception: pass return [] def save_history(h): json.dump(h, open(HISTORY_FILE, "w"), ensure_ascii=False, indent=2) # ── FastAPI app ────────────────────────────────────────────────────── app = FastAPI(title="XTTS Studio") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"] ) LANGUAGES = { "ar": "العربية", "en": "English", "es": "Español", "fr": "Français", "de": "Deutsch", "it": "Italiano", "pt": "Português","ru": "Русский", "zh-cn": "中文", "ja": "日本語", "ko": "한국어", "tr": "Türkçe", "nl": "Nederlands","pl": "Polski", "cs": "Čeština", "hi": "हिन्दी", } # ══════════════════════════════════════════════════════════════════════ # HTML / React Frontend # ══════════════════════════════════════════════════════════════════════ HTML = r""" XTTS Voice Studio

""" # ══════════════════════════════════════════════════════════════════════ # Routes # ══════════════════════════════════════════════════════════════════════ @app.get("/", response_class=HTMLResponse) async def ui(): page = ( HTML .replace("LANGUAGES_JSON", json.dumps(LANGUAGES, ensure_ascii=False)) .replace("DEVICE_PLACEHOLDER", device) ) return page @app.post("/generate") async def generate( text: str = Form(...), language: str = Form("ar"), temperature: float = Form(0.75), speed: float = Form(1.0), top_k: int = Form(50), top_p: float = Form(0.85), repetition_penalty: float = Form(5.0), enable_text_splitting: bool = Form(True), voice_name: str = Form(None), files: list[UploadFile] = File(default=[]), ): if not text.strip(): raise HTTPException(400, "النص فارغ.") ref_paths, tmp_files = [], [] for f in files: path = f"/tmp/ref_{uuid.uuid4().hex}_{f.filename}" with open(path, "wb") as buf: shutil.copyfileobj(f.file, buf) ref_paths.append(path) tmp_files.append(path) if voice_name: lib_dir = os.path.join(VOICE_LIB, voice_name) if os.path.isdir(lib_dir): ref_paths += [ os.path.join(lib_dir, fn) for fn in os.listdir(lib_dir) if fn.lower().endswith((".wav", ".mp3", ".flac", ".ogg")) ] if not ref_paths: raise HTTPException(400, "يجب تحديد عينة صوتية مرجعية.") out_name = f"gen_{uuid.uuid4().hex[:8]}.wav" out_path = os.path.join(OUTPUT_DIR, out_name) try: xtts_engine.tts_to_file( text=text, speaker_wav=ref_paths, language=language, file_path=out_path, temperature=float(temperature), speed=float(speed), top_k=int(top_k), top_p=float(top_p), repetition_penalty=float(repetition_penalty), enable_text_splitting=bool(enable_text_splitting), ) finally: for p in tmp_files: try: os.remove(p) except Exception: pass hist = load_history() hist.append({"filename": out_name, "text": text[:120], "language": language, "ts": int(time.time())}) save_history(hist) return {"filename": out_name} @app.get("/audio/{filename}") def get_audio(filename: str): path = os.path.join(OUTPUT_DIR, filename) if not os.path.exists(path): raise HTTPException(404, "File not found.") return FileResponse(path, media_type="audio/wav") @app.get("/history") def get_history(): return JSONResponse(load_history()) @app.get("/voices") def list_voices(): if not os.path.isdir(VOICE_LIB): return [] return [d for d in os.listdir(VOICE_LIB) if os.path.isdir(os.path.join(VOICE_LIB, d))] @app.post("/voices/save") async def save_voice( name: str = Form(...), file: UploadFile = File(...), file2: UploadFile = File(default=None), ): safe = name.strip().replace("/", "_").replace("..", "_") lib_dir = os.path.join(VOICE_LIB, safe) os.makedirs(lib_dir, exist_ok=True) for f in ([file, file2] if file2 else [file]): with open(os.path.join(lib_dir, f.filename), "wb") as buf: shutil.copyfileobj(f.file, buf) return {"name": safe} @app.delete("/voices/{name}") def delete_voice(name: str): lib_dir = os.path.join(VOICE_LIB, name) if os.path.isdir(lib_dir): shutil.rmtree(lib_dir) return {"deleted": name} # ══════════════════════════════════════════════════════════════════════ # Entry point (used by Dockerfile CMD) # ══════════════════════════════════════════════════════════════════════ if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")