YT-AI-Automation / backend /config /config.example.py
github-actions
Sync Docker Space
5f3e9f5
"""Configuration for TextBro / Screenshot Studio.
Copy this file to ``config.py`` (which is gitignored) and fill in your
actual values. Every entry can also be supplied through an environment
variable (or a ``.env`` file in the project root) β€” env wins over the
literal default below, so you can keep an unredacted ``config.py`` empty
on shared machines.
Env vars used:
API_KEY, API_URL, MODEL_VISION, MODEL_VISION_FALLBACK
HOST, PORT, DEBUG, MAX_CONTENT_LENGTH_BYTES
CORS_ORIGINS (comma-separated allowlist; ``*`` for wide-open)
RATE_LIMIT, RATE_LIMIT_DEFAULT (e.g. RATE_LIMIT_DEFAULT="60/minute;10/second")
PREFLIGHT_CACHE_SECS
The backend talks to any OpenAI-compatible chat-completions endpoint
(OpenAI, Groq, Together, NVIDIA NIM, a local llama.cpp server, …).
"""
import os
import platform
def _env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
# ─── Engine selection ──────────────────────────────────────────────────────
#
# The video build pipeline auto-selects between two engines based on the
# host OS so the same backend works on a Windows dev box (PowerPoint COM
# automation) and a Linux server (MoviePy + ffmpeg). Override by exporting
# ``USE_POWERPOINT=1`` / ``USE_POWERPOINT=0`` in the environment.
USE_POWERPOINT = _env("USE_POWERPOINT", "").strip().lower() in {"1", "true", "yes", "on"} \
if _env("USE_POWERPOINT") else (platform.system() == "Windows")
# ─── API Configuration ─────────────────────────────────────────────────────
#
# `API_KEY` and `API_URL` are used by the vision/OCR client
# (src/core/vision_client.py). `API_URL` is also used as the base URL for
# chat completions β€” each entry in MODELS_CONFIG below provides its own
# `api_key` so you can mix providers if you want.
API_KEY = _env("API_KEY", "your-api-key-here")
API_URL = _env("API_URL", "https://integrate.api.nvidia.com/v1")
DEFAULT_MAX_TOKENS = int(_env("MAX_TOKENS", "32768"))
# ─── Chat Models (used by src/core/ai_client.py) ───────────────────────────
#
# The frontend exposes named model choices. Feel free
# to point them at any model your provider exposes. Each entry MUST define
# `model`, `temperature`, `top_p`, `max_tokens`, and `api_key`.
# The keys here are what the UI ships as `model_choice`. Each one maps
# to a build.nvidia.com model slug that is callable through
# `https://integrate.api.nvidia.com/v1`. See
# https://build.nvidia.com/models for the live catalog β€” "Free Endpoint"
# models are always callable with a developer API key, "Downloadable"
# entries require self-hosting.
MODELS_CONFIG = {
# Default β€” Qwen 3.5 122B MoE (10B active). Large + capable; user
# pick from the UI's "Default" slot.
"default": {
"model": _env("MODEL_DEFAULT", "qwen/qwen3.5-122b-a10b"),
"temperature": 0.2,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_DEFAULT", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Fast β€” DeepSeek V4 Flash (284B MoE, 1M ctx), tuned for throughput.
"fast": {
"model": _env("MODEL_FAST", "deepseek-ai/deepseek-v4-flash"),
"temperature": 0.3,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_FAST", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Short & fastest β€” Llama 3.1 8B Instruct for very low latency runs.
"short": {
"model": _env("MODEL_SHORT", "meta/llama-3.1-8b-instruct"),
"temperature": 0.3,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_SHORT", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Balanced β€” GLM-4.7 (358B, 131K ctx). Solid middle ground for
# coding / HTML generation + tool calling.
"balanced": {
"model": _env("MODEL_BALANCED", "z-ai/glm-4.7"),
"temperature": 0.2,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_BALANCED", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Quality β€” DeepSeek V3.2 (685B), long-context reasoning.
"quality": {
"model": _env("MODEL_QUALITY", "deepseek-ai/deepseek-v3.2"),
"temperature": 0.1,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_QUALITY", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Long context β€” DeepSeek V4 Pro (1.6T MoE, 49B active, 1M ctx).
"long": {
"model": _env("MODEL_LONG", "deepseek-ai/deepseek-v4-pro"),
"temperature": 0.1,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_LONG", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Llama 3.3 70B kept for back-compat with old stored runs that set
# `model_choice: "llama"`. New UI does not expose it separately.
"llama": {
"model": _env("MODEL_LLAMA", "meta/llama-3.3-70b-instruct"),
"temperature": 0.2,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_LLAMA", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
}
# ─── Vision Model (used by src/core/vision_client.py) ──────────────────────
MODEL_VISION = _env("MODEL_VISION", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
MODEL_VISION_FALLBACK = _env("MODEL_VISION_FALLBACK", "meta/llama-3.2-90b-vision-instruct")
# ─── Application Settings ──────────────────────────────────────────────────
#
# Defaults mirror the safe values used by ``app.py`` when run as a script.
# DEBUG defaults to False to avoid the Werkzeug debugger (which is
# remote-code-execution-by-design) being on accidentally.
DEBUG = _env("DEBUG", "0").lower() in {"1", "true", "yes", "on"}
PORT = int(_env("PORT", "5000"))
HOST = _env("HOST", "127.0.0.1") # bind to loopback by default
# ─── Output Folders (relative to backend/) ─────────────────────────────────
OUTPUT_FOLDER = "output/screenshots"
HTML_FOLDER = "output/html"
# ─── Screenshot Settings ───────────────────────────────────────────────────
DEFAULT_VIEWPORT_WIDTH = 1920
DEFAULT_VIEWPORT_HEIGHT = 1080
DEFAULT_ZOOM = 2.1
DEFAULT_OVERLAP = 15
MAX_SCREENSHOTS_LIMIT = 50
# Cap on how many pages we'll rasterize from a PDF in /extract-from-image.
PDF_MAX_PAGES = int(_env("PDF_MAX_PAGES", "100"))
# ─── AI Settings ───────────────────────────────────────────────────────────
MAX_TOKENS = DEFAULT_MAX_TOKENS
TEMPERATURE = 0.2
# ─── PowerPoint Automation Settings (Windows only) ─────────────────────────
POWERPOINT_ENABLED = True
POWERPOINT_TEMPLATE_PATH = "templates/powerpoint/default.pptm"
POWERPOINT_OUTPUT_FOLDER = "output/presentations"
POWERPOINT_VIDEO_FOLDER = "output/videos"
# ─── Slide Settings ────────────────────────────────────────────────────────
DEFAULT_SLIDE_DURATION = 5.0 # seconds per slide
DEFAULT_TRANSITION_TYPE = "fade" # fade | push | wipe | none
DEFAULT_TRANSITION_DURATION = 0.5 # seconds for transition
# ─── Video Export Settings ─────────────────────────────────────────────────
VIDEO_RESOLUTION_WIDTH = 1920
VIDEO_RESOLUTION_HEIGHT = 1080
VIDEO_FPS = 30
VIDEO_QUALITY = 5 # 1-5, where 5 is highest
VIDEO_FORMAT = "mp4"
# ─── Image Insertion Settings ──────────────────────────────────────────────
IMAGE_FIT_MODE = "contain" # contain | cover | fill
IMAGE_POSITION = "center" # center | top | bottom
PRESERVE_ASPECT_RATIO = True