Spaces:

ShadowHunter222
/

hello

Sleeping

App Files Files Community

hello / config.py

ShadowHunter222

Upload 10 files

b725430 verified 9 days ago

raw

history blame contribute delete

5.52 kB

	"""
	Chatterbox Turbo TTS — Centralized Configuration
	═══════════════════════════════════════════════════
	Optimised for HF Space free tier (2 vCPU).
	Adjust MODEL_DTYPE to switch quantization (q8/q4/fp16/fp32).
	All settings overridable via environment variables prefixed CB_.
	"""
	import os

	_HERE = os.path.dirname(os.path.abspath(__file__))


	def _get_bool(name: str, default: bool) -> bool:
	raw = os.getenv(name)
	if raw is None:
	return default
	return raw.strip().lower() in {"1", "true", "yes", "on"}


	class Config:
	# ── Model ────────────────────────────────────────────────────
	MODEL_ID: str = os.getenv("CB_MODEL_ID", "ResembleAI/chatterbox-turbo-ONNX")

	# fp32 → highest quality, ~1.4 GB, slowest
	# fp16 → good quality, ~0.7 GB
	# q8 → ★ recommended, ~0.35 GB, best balance
	# q4 → smallest, ~0.17 GB, fastest, slight loss
	# q4f16 → q4 weights + fp16 activations
	MODEL_DTYPE: str = os.getenv("CB_MODEL_DTYPE", "q4")

	MODELS_DIR: str = os.getenv("CB_MODELS_DIR", os.path.join(_HERE, "models"))

	# ── ONNX Runtime CPU tuning (optimised for 2 vCPU) ───────────
	#
	# KEY RULE: intra_op threads MUST match physical cores.
	# → 4 threads on 2 cores = oversubscription = SLOWER.
	# → 2 threads on 2 cores = each op uses both cores perfectly.
	#
	# MAX_WORKERS = 1 ensures ONE inference gets both cores.
	# → 2 workers would split 2 cores = both requests slow.
	#
	CPU_THREADS: int = int(os.getenv("CB_CPU_THREADS", "2"))
	MAX_WORKERS: int = int(os.getenv("CB_MAX_WORKERS", "1"))

	# ── Generation defaults ──────────────────────────────────────
	SAMPLE_RATE: int = 24000
	MAX_NEW_TOKENS: int = int(os.getenv("CB_MAX_NEW_TOKENS", "768"))
	REPETITION_PENALTY: float = float(os.getenv("CB_REPETITION_PENALTY", "1.2"))
	MAX_TEXT_LENGTH: int = int(os.getenv("CB_MAX_TEXT_LENGTH", "50000"))

	# ── Model constants (official card — do not change) ──────────
	START_SPEECH_TOKEN: int = 6561
	STOP_SPEECH_TOKEN: int = 6562
	SILENCE_TOKEN: int = 4299
	NUM_KV_HEADS: int = 16
	HEAD_DIM: int = 64

	# ── Paralinguistic tags (Turbo native) ───────────────────────
	PARALINGUISTIC_TAGS: tuple = (
	"laugh", "chuckle", "cough", "sigh", "gasp",
	"shush", "groan", "sniff", "clear throat",
	)

	# ── Voice / reference audio ──────────────────────────────────
	# NOTE: Official ResembleAI/chatterbox-turbo-ONNX has no bundled voice.
	# The default_voice.wav is a plain audio sample from community repo
	# (not a model — just a reference WAV, safe to use from any source).
	DEFAULT_VOICE_REPO: str = "onnx-community/chatterbox-ONNX"
	DEFAULT_VOICE_FILE: str = "default_voice.wav"
	MAX_VOICE_UPLOAD_BYTES: int = 10 * 1024 * 1024 # 10 MB
	MIN_REF_DURATION_SEC: float = 1.5
	MAX_REF_DURATION_SEC: float = 30.0
	VOICE_CACHE_SIZE: int = int(os.getenv("CB_VOICE_CACHE_SIZE", "20"))
	VOICE_CACHE_TTL_SEC: int = int(os.getenv("CB_VOICE_CACHE_TTL", "3600")) # 1 hour

	# ── Streaming ────────────────────────────────────────────────
	# Smaller chunks = faster TTFB (first audio arrives sooner)
	# ~200 chars ≈ 1–2 sentences ≈ fastest first-chunk on 2 vCPU
	MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
	# Additive parallel mode (2-way split: primary + helper).
	ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
	HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
	HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
	HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
	# Internal housekeeping TTLs to avoid retaining stream metadata indefinitely.
	INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120"))
	INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600"))
	# Optional shared secret for internal chunk endpoints.
	INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()

	# ── Server ───────────────────────────────────────────────────
	HOST: str = os.getenv("CB_HOST", "0.0.0.0")
	PORT: int = int(os.getenv("CB_PORT", "7860"))

	ALLOWED_ORIGINS: list = [
	"https://toolboxesai.com",
	"https://www.toolboxesai.com",
	"www.toolboxesai.com",
	"toolboxesai.com",
	"http://localhost:8788", "http://127.0.0.1:8788",
	"http://localhost:5502", "http://127.0.0.1:5502",
	"http://localhost:5501", "http://127.0.0.1:5501",
	"http://localhost:5500", "http://127.0.0.1:5500",
	"http://localhost:5173", "http://127.0.0.1:5173",
	"http://localhost:7860", "http://127.0.0.1:7860",
	]