# ╔══════════════════════════════════════════════════════════════════╗
# ║ XTTS v2 Advanced Voice Studio — HuggingFace Space ║
# ╚══════════════════════════════════════════════════════════════════╝
import os, sys, time, json, uuid, shutil, threading
import uvicorn
from fastapi import FastAPI, Form, File, UploadFile, HTTPException
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import torch
# ── Env ──────────────────────────────────────────────────────────────
os.environ["COQUI_TOS_AGREED"] = "1"
# Point HF cache to a writable directory inside the Space
os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
VOICE_LIB = "/home/user/app/voice_library"
OUTPUT_DIR = "/home/user/app/outputs"
HISTORY_FILE = "/home/user/app/history.json"
for d in [VOICE_LIB, OUTPUT_DIR]:
os.makedirs(d, exist_ok=True)
# ── Load TTS on CPU ──────────────────────────────────────────────────
from TTS.api import TTS
# HF Spaces free tier is CPU-only; force CPU explicitly
device = "cpu"
print(f"[*] Loading XTTS v2 on {device.upper()} …")
xtts_engine = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
print("[✓] Model ready.")
# ── History helpers ──────────────────────────────────────────────────
def load_history():
if os.path.exists(HISTORY_FILE):
try:
return json.load(open(HISTORY_FILE))
except Exception:
pass
return []
def save_history(h):
json.dump(h, open(HISTORY_FILE, "w"), ensure_ascii=False, indent=2)
# ── FastAPI app ──────────────────────────────────────────────────────
app = FastAPI(title="XTTS Studio")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]
)
LANGUAGES = {
"ar": "العربية", "en": "English", "es": "Español", "fr": "Français",
"de": "Deutsch", "it": "Italiano", "pt": "Português","ru": "Русский",
"zh-cn": "中文", "ja": "日本語", "ko": "한국어", "tr": "Türkçe",
"nl": "Nederlands","pl": "Polski", "cs": "Čeština", "hi": "हिन्दी",
}
# ══════════════════════════════════════════════════════════════════════
# HTML / React Frontend
# ══════════════════════════════════════════════════════════════════════
HTML = r"""
XTTS Voice Studio
"""
# ══════════════════════════════════════════════════════════════════════
# Routes
# ══════════════════════════════════════════════════════════════════════
@app.get("/", response_class=HTMLResponse)
async def ui():
page = (
HTML
.replace("LANGUAGES_JSON", json.dumps(LANGUAGES, ensure_ascii=False))
.replace("DEVICE_PLACEHOLDER", device)
)
return page
@app.post("/generate")
async def generate(
text: str = Form(...),
language: str = Form("ar"),
temperature: float = Form(0.75),
speed: float = Form(1.0),
top_k: int = Form(50),
top_p: float = Form(0.85),
repetition_penalty: float = Form(5.0),
enable_text_splitting: bool = Form(True),
voice_name: str = Form(None),
files: list[UploadFile] = File(default=[]),
):
if not text.strip():
raise HTTPException(400, "النص فارغ.")
ref_paths, tmp_files = [], []
for f in files:
path = f"/tmp/ref_{uuid.uuid4().hex}_{f.filename}"
with open(path, "wb") as buf:
shutil.copyfileobj(f.file, buf)
ref_paths.append(path)
tmp_files.append(path)
if voice_name:
lib_dir = os.path.join(VOICE_LIB, voice_name)
if os.path.isdir(lib_dir):
ref_paths += [
os.path.join(lib_dir, fn)
for fn in os.listdir(lib_dir)
if fn.lower().endswith((".wav", ".mp3", ".flac", ".ogg"))
]
if not ref_paths:
raise HTTPException(400, "يجب تحديد عينة صوتية مرجعية.")
out_name = f"gen_{uuid.uuid4().hex[:8]}.wav"
out_path = os.path.join(OUTPUT_DIR, out_name)
try:
xtts_engine.tts_to_file(
text=text,
speaker_wav=ref_paths,
language=language,
file_path=out_path,
temperature=float(temperature),
speed=float(speed),
top_k=int(top_k),
top_p=float(top_p),
repetition_penalty=float(repetition_penalty),
enable_text_splitting=bool(enable_text_splitting),
)
finally:
for p in tmp_files:
try:
os.remove(p)
except Exception:
pass
hist = load_history()
hist.append({"filename": out_name, "text": text[:120], "language": language, "ts": int(time.time())})
save_history(hist)
return {"filename": out_name}
@app.get("/audio/{filename}")
def get_audio(filename: str):
path = os.path.join(OUTPUT_DIR, filename)
if not os.path.exists(path):
raise HTTPException(404, "File not found.")
return FileResponse(path, media_type="audio/wav")
@app.get("/history")
def get_history():
return JSONResponse(load_history())
@app.get("/voices")
def list_voices():
if not os.path.isdir(VOICE_LIB):
return []
return [d for d in os.listdir(VOICE_LIB) if os.path.isdir(os.path.join(VOICE_LIB, d))]
@app.post("/voices/save")
async def save_voice(
name: str = Form(...),
file: UploadFile = File(...),
file2: UploadFile = File(default=None),
):
safe = name.strip().replace("/", "_").replace("..", "_")
lib_dir = os.path.join(VOICE_LIB, safe)
os.makedirs(lib_dir, exist_ok=True)
for f in ([file, file2] if file2 else [file]):
with open(os.path.join(lib_dir, f.filename), "wb") as buf:
shutil.copyfileobj(f.file, buf)
return {"name": safe}
@app.delete("/voices/{name}")
def delete_voice(name: str):
lib_dir = os.path.join(VOICE_LIB, name)
if os.path.isdir(lib_dir):
shutil.rmtree(lib_dir)
return {"deleted": name}
# ══════════════════════════════════════════════════════════════════════
# Entry point (used by Dockerfile CMD)
# ══════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")