""" Chatterbox Turbo TTS — Centralized Configuration ═══════════════════════════════════════════════════ Optimised for HF Space free tier (2 vCPU). Adjust MODEL_DTYPE to switch quantization (q8/q4/fp16/fp32). All settings overridable via environment variables prefixed CB_. """ import os _HERE = os.path.dirname(os.path.abspath(__file__)) def _get_bool(name: str, default: bool) -> bool: raw = os.getenv(name) if raw is None: return default return raw.strip().lower() in {"1", "true", "yes", "on"} class Config: # ── Model ──────────────────────────────────────────────────── MODEL_ID: str = os.getenv("CB_MODEL_ID", "ResembleAI/chatterbox-turbo-ONNX") # fp32 → highest quality, ~1.4 GB, slowest # fp16 → good quality, ~0.7 GB # q8 → ★ recommended, ~0.35 GB, best balance # q4 → smallest, ~0.17 GB, fastest, slight loss # q4f16 → q4 weights + fp16 activations MODEL_DTYPE: str = os.getenv("CB_MODEL_DTYPE", "q4") MODELS_DIR: str = os.getenv("CB_MODELS_DIR", os.path.join(_HERE, "models")) # ── ONNX Runtime CPU tuning (optimised for 2 vCPU) ─────────── # # KEY RULE: intra_op threads MUST match physical cores. # → 4 threads on 2 cores = oversubscription = SLOWER. # → 2 threads on 2 cores = each op uses both cores perfectly. # # MAX_WORKERS = 1 ensures ONE inference gets both cores. # → 2 workers would split 2 cores = both requests slow. # CPU_THREADS: int = int(os.getenv("CB_CPU_THREADS", "2")) MAX_WORKERS: int = int(os.getenv("CB_MAX_WORKERS", "1")) # ── Generation defaults ────────────────────────────────────── SAMPLE_RATE: int = 24000 MAX_NEW_TOKENS: int = int(os.getenv("CB_MAX_NEW_TOKENS", "768")) REPETITION_PENALTY: float = float(os.getenv("CB_REPETITION_PENALTY", "1.2")) MAX_TEXT_LENGTH: int = int(os.getenv("CB_MAX_TEXT_LENGTH", "50000")) # ── Model constants (official card — do not change) ────────── START_SPEECH_TOKEN: int = 6561 STOP_SPEECH_TOKEN: int = 6562 SILENCE_TOKEN: int = 4299 NUM_KV_HEADS: int = 16 HEAD_DIM: int = 64 # ── Paralinguistic tags (Turbo native) ─────────────────────── PARALINGUISTIC_TAGS: tuple = ( "laugh", "chuckle", "cough", "sigh", "gasp", "shush", "groan", "sniff", "clear throat", ) # ── Voice / reference audio ────────────────────────────────── # NOTE: Official ResembleAI/chatterbox-turbo-ONNX has no bundled voice. # The default_voice.wav is a plain audio sample from community repo # (not a model — just a reference WAV, safe to use from any source). DEFAULT_VOICE_REPO: str = "onnx-community/chatterbox-ONNX" DEFAULT_VOICE_FILE: str = "default_voice.wav" MAX_VOICE_UPLOAD_BYTES: int = 10 * 1024 * 1024 # 10 MB MIN_REF_DURATION_SEC: float = 1.5 MAX_REF_DURATION_SEC: float = 30.0 VOICE_CACHE_SIZE: int = int(os.getenv("CB_VOICE_CACHE_SIZE", "20")) VOICE_CACHE_TTL_SEC: int = int(os.getenv("CB_VOICE_CACHE_TTL", "3600")) # 1 hour # ── Streaming ──────────────────────────────────────────────── # Smaller chunks = faster TTFB (first audio arrives sooner) # ~200 chars ≈ 1–2 sentences ≈ fastest first-chunk on 2 vCPU MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100")) # Additive parallel mode (2-way split: primary + helper). ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True) HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip() HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45")) HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True) # Internal housekeeping TTLs to avoid retaining stream metadata indefinitely. INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120")) INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600")) # Optional shared secret for internal chunk endpoints. INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip() # ── Server ─────────────────────────────────────────────────── HOST: str = os.getenv("CB_HOST", "0.0.0.0") PORT: int = int(os.getenv("CB_PORT", "7860")) ALLOWED_ORIGINS: list = [ "https://toolboxesai.com", "https://www.toolboxesai.com", "www.toolboxesai.com", "toolboxesai.com", "http://localhost:8788", "http://127.0.0.1:8788", "http://localhost:5502", "http://127.0.0.1:5502", "http://localhost:5501", "http://127.0.0.1:5501", "http://localhost:5500", "http://127.0.0.1:5500", "http://localhost:5173", "http://127.0.0.1:5173", "http://localhost:7860", "http://127.0.0.1:7860", ]