Spaces:
Running
Running
File size: 8,389 Bytes
5f3e9f5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | """Configuration for TextBro / Screenshot Studio.
Copy this file to ``config.py`` (which is gitignored) and fill in your
actual values. Every entry can also be supplied through an environment
variable (or a ``.env`` file in the project root) β env wins over the
literal default below, so you can keep an unredacted ``config.py`` empty
on shared machines.
Env vars used:
API_KEY, API_URL, MODEL_VISION, MODEL_VISION_FALLBACK
HOST, PORT, DEBUG, MAX_CONTENT_LENGTH_BYTES
CORS_ORIGINS (comma-separated allowlist; ``*`` for wide-open)
RATE_LIMIT, RATE_LIMIT_DEFAULT (e.g. RATE_LIMIT_DEFAULT="60/minute;10/second")
PREFLIGHT_CACHE_SECS
The backend talks to any OpenAI-compatible chat-completions endpoint
(OpenAI, Groq, Together, NVIDIA NIM, a local llama.cpp server, β¦).
"""
import os
import platform
def _env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
# βββ Engine selection ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#
# The video build pipeline auto-selects between two engines based on the
# host OS so the same backend works on a Windows dev box (PowerPoint COM
# automation) and a Linux server (MoviePy + ffmpeg). Override by exporting
# ``USE_POWERPOINT=1`` / ``USE_POWERPOINT=0`` in the environment.
USE_POWERPOINT = _env("USE_POWERPOINT", "").strip().lower() in {"1", "true", "yes", "on"} \
if _env("USE_POWERPOINT") else (platform.system() == "Windows")
# βββ API Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββ
#
# `API_KEY` and `API_URL` are used by the vision/OCR client
# (src/core/vision_client.py). `API_URL` is also used as the base URL for
# chat completions β each entry in MODELS_CONFIG below provides its own
# `api_key` so you can mix providers if you want.
API_KEY = _env("API_KEY", "your-api-key-here")
API_URL = _env("API_URL", "https://integrate.api.nvidia.com/v1")
DEFAULT_MAX_TOKENS = int(_env("MAX_TOKENS", "32768"))
# βββ Chat Models (used by src/core/ai_client.py) βββββββββββββββββββββββββββ
#
# The frontend exposes named model choices. Feel free
# to point them at any model your provider exposes. Each entry MUST define
# `model`, `temperature`, `top_p`, `max_tokens`, and `api_key`.
# The keys here are what the UI ships as `model_choice`. Each one maps
# to a build.nvidia.com model slug that is callable through
# `https://integrate.api.nvidia.com/v1`. See
# https://build.nvidia.com/models for the live catalog β "Free Endpoint"
# models are always callable with a developer API key, "Downloadable"
# entries require self-hosting.
MODELS_CONFIG = {
# Default β Qwen 3.5 122B MoE (10B active). Large + capable; user
# pick from the UI's "Default" slot.
"default": {
"model": _env("MODEL_DEFAULT", "qwen/qwen3.5-122b-a10b"),
"temperature": 0.2,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_DEFAULT", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Fast β DeepSeek V4 Flash (284B MoE, 1M ctx), tuned for throughput.
"fast": {
"model": _env("MODEL_FAST", "deepseek-ai/deepseek-v4-flash"),
"temperature": 0.3,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_FAST", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Short & fastest β Llama 3.1 8B Instruct for very low latency runs.
"short": {
"model": _env("MODEL_SHORT", "meta/llama-3.1-8b-instruct"),
"temperature": 0.3,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_SHORT", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Balanced β GLM-4.7 (358B, 131K ctx). Solid middle ground for
# coding / HTML generation + tool calling.
"balanced": {
"model": _env("MODEL_BALANCED", "z-ai/glm-4.7"),
"temperature": 0.2,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_BALANCED", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Quality β DeepSeek V3.2 (685B), long-context reasoning.
"quality": {
"model": _env("MODEL_QUALITY", "deepseek-ai/deepseek-v3.2"),
"temperature": 0.1,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_QUALITY", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Long context β DeepSeek V4 Pro (1.6T MoE, 49B active, 1M ctx).
"long": {
"model": _env("MODEL_LONG", "deepseek-ai/deepseek-v4-pro"),
"temperature": 0.1,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_LONG", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
# Llama 3.3 70B kept for back-compat with old stored runs that set
# `model_choice: "llama"`. New UI does not expose it separately.
"llama": {
"model": _env("MODEL_LLAMA", "meta/llama-3.3-70b-instruct"),
"temperature": 0.2,
"top_p": 0.9,
"max_tokens": int(_env("MAX_TOKENS_LLAMA", str(DEFAULT_MAX_TOKENS))),
"api_key": API_KEY,
},
}
# βββ Vision Model (used by src/core/vision_client.py) ββββββββββββββββββββββ
MODEL_VISION = _env("MODEL_VISION", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
MODEL_VISION_FALLBACK = _env("MODEL_VISION_FALLBACK", "meta/llama-3.2-90b-vision-instruct")
# βββ Application Settings ββββββββββββββββββββββββββββββββββββββββββββββββββ
#
# Defaults mirror the safe values used by ``app.py`` when run as a script.
# DEBUG defaults to False to avoid the Werkzeug debugger (which is
# remote-code-execution-by-design) being on accidentally.
DEBUG = _env("DEBUG", "0").lower() in {"1", "true", "yes", "on"}
PORT = int(_env("PORT", "5000"))
HOST = _env("HOST", "127.0.0.1") # bind to loopback by default
# βββ Output Folders (relative to backend/) βββββββββββββββββββββββββββββββββ
OUTPUT_FOLDER = "output/screenshots"
HTML_FOLDER = "output/html"
# βββ Screenshot Settings βββββββββββββββββββββββββββββββββββββββββββββββββββ
DEFAULT_VIEWPORT_WIDTH = 1920
DEFAULT_VIEWPORT_HEIGHT = 1080
DEFAULT_ZOOM = 2.1
DEFAULT_OVERLAP = 15
MAX_SCREENSHOTS_LIMIT = 50
# Cap on how many pages we'll rasterize from a PDF in /extract-from-image.
PDF_MAX_PAGES = int(_env("PDF_MAX_PAGES", "100"))
# βββ AI Settings βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
MAX_TOKENS = DEFAULT_MAX_TOKENS
TEMPERATURE = 0.2
# βββ PowerPoint Automation Settings (Windows only) βββββββββββββββββββββββββ
POWERPOINT_ENABLED = True
POWERPOINT_TEMPLATE_PATH = "templates/powerpoint/default.pptm"
POWERPOINT_OUTPUT_FOLDER = "output/presentations"
POWERPOINT_VIDEO_FOLDER = "output/videos"
# βββ Slide Settings ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEFAULT_SLIDE_DURATION = 5.0 # seconds per slide
DEFAULT_TRANSITION_TYPE = "fade" # fade | push | wipe | none
DEFAULT_TRANSITION_DURATION = 0.5 # seconds for transition
# βββ Video Export Settings βββββββββββββββββββββββββββββββββββββββββββββββββ
VIDEO_RESOLUTION_WIDTH = 1920
VIDEO_RESOLUTION_HEIGHT = 1080
VIDEO_FPS = 30
VIDEO_QUALITY = 5 # 1-5, where 5 is highest
VIDEO_FORMAT = "mp4"
# βββ Image Insertion Settings ββββββββββββββββββββββββββββββββββββββββββββββ
IMAGE_FIT_MODE = "contain" # contain | cover | fill
IMAGE_POSITION = "center" # center | top | bottom
PRESERVE_ASPECT_RATIO = True
|