hello / config.py
ShadowHunter222's picture
Upload 10 files
b725430 verified
"""
Chatterbox Turbo TTS β€” Centralized Configuration
═══════════════════════════════════════════════════
Optimised for HF Space free tier (2 vCPU).
Adjust MODEL_DTYPE to switch quantization (q8/q4/fp16/fp32).
All settings overridable via environment variables prefixed CB_.
"""
import os
_HERE = os.path.dirname(os.path.abspath(__file__))
def _get_bool(name: str, default: bool) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return raw.strip().lower() in {"1", "true", "yes", "on"}
class Config:
# ── Model ────────────────────────────────────────────────────
MODEL_ID: str = os.getenv("CB_MODEL_ID", "ResembleAI/chatterbox-turbo-ONNX")
# fp32 β†’ highest quality, ~1.4 GB, slowest
# fp16 β†’ good quality, ~0.7 GB
# q8 β†’ β˜… recommended, ~0.35 GB, best balance
# q4 β†’ smallest, ~0.17 GB, fastest, slight loss
# q4f16 β†’ q4 weights + fp16 activations
MODEL_DTYPE: str = os.getenv("CB_MODEL_DTYPE", "q4")
MODELS_DIR: str = os.getenv("CB_MODELS_DIR", os.path.join(_HERE, "models"))
# ── ONNX Runtime CPU tuning (optimised for 2 vCPU) ───────────
#
# KEY RULE: intra_op threads MUST match physical cores.
# β†’ 4 threads on 2 cores = oversubscription = SLOWER.
# β†’ 2 threads on 2 cores = each op uses both cores perfectly.
#
# MAX_WORKERS = 1 ensures ONE inference gets both cores.
# β†’ 2 workers would split 2 cores = both requests slow.
#
CPU_THREADS: int = int(os.getenv("CB_CPU_THREADS", "2"))
MAX_WORKERS: int = int(os.getenv("CB_MAX_WORKERS", "1"))
# ── Generation defaults ──────────────────────────────────────
SAMPLE_RATE: int = 24000
MAX_NEW_TOKENS: int = int(os.getenv("CB_MAX_NEW_TOKENS", "768"))
REPETITION_PENALTY: float = float(os.getenv("CB_REPETITION_PENALTY", "1.2"))
MAX_TEXT_LENGTH: int = int(os.getenv("CB_MAX_TEXT_LENGTH", "50000"))
# ── Model constants (official card β€” do not change) ──────────
START_SPEECH_TOKEN: int = 6561
STOP_SPEECH_TOKEN: int = 6562
SILENCE_TOKEN: int = 4299
NUM_KV_HEADS: int = 16
HEAD_DIM: int = 64
# ── Paralinguistic tags (Turbo native) ───────────────────────
PARALINGUISTIC_TAGS: tuple = (
"laugh", "chuckle", "cough", "sigh", "gasp",
"shush", "groan", "sniff", "clear throat",
)
# ── Voice / reference audio ──────────────────────────────────
# NOTE: Official ResembleAI/chatterbox-turbo-ONNX has no bundled voice.
# The default_voice.wav is a plain audio sample from community repo
# (not a model β€” just a reference WAV, safe to use from any source).
DEFAULT_VOICE_REPO: str = "onnx-community/chatterbox-ONNX"
DEFAULT_VOICE_FILE: str = "default_voice.wav"
MAX_VOICE_UPLOAD_BYTES: int = 10 * 1024 * 1024 # 10 MB
MIN_REF_DURATION_SEC: float = 1.5
MAX_REF_DURATION_SEC: float = 30.0
VOICE_CACHE_SIZE: int = int(os.getenv("CB_VOICE_CACHE_SIZE", "20"))
VOICE_CACHE_TTL_SEC: int = int(os.getenv("CB_VOICE_CACHE_TTL", "3600")) # 1 hour
# ── Streaming ────────────────────────────────────────────────
# Smaller chunks = faster TTFB (first audio arrives sooner)
# ~200 chars β‰ˆ 1–2 sentences β‰ˆ fastest first-chunk on 2 vCPU
MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
# Additive parallel mode (2-way split: primary + helper).
ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
# Internal housekeeping TTLs to avoid retaining stream metadata indefinitely.
INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120"))
INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600"))
# Optional shared secret for internal chunk endpoints.
INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
# ── Server ───────────────────────────────────────────────────
HOST: str = os.getenv("CB_HOST", "0.0.0.0")
PORT: int = int(os.getenv("CB_PORT", "7860"))
ALLOWED_ORIGINS: list = [
"https://toolboxesai.com",
"https://www.toolboxesai.com",
"www.toolboxesai.com",
"toolboxesai.com",
"http://localhost:8788", "http://127.0.0.1:8788",
"http://localhost:5502", "http://127.0.0.1:5502",
"http://localhost:5501", "http://127.0.0.1:5501",
"http://localhost:5500", "http://127.0.0.1:5500",
"http://localhost:5173", "http://127.0.0.1:5173",
"http://localhost:7860", "http://127.0.0.1:7860",
]