File size: 8,389 Bytes
5f3e9f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""Configuration for TextBro / Screenshot Studio.

Copy this file to ``config.py`` (which is gitignored) and fill in your
actual values. Every entry can also be supplied through an environment
variable (or a ``.env`` file in the project root) β€” env wins over the
literal default below, so you can keep an unredacted ``config.py`` empty
on shared machines.

Env vars used:
    API_KEY, API_URL, MODEL_VISION, MODEL_VISION_FALLBACK
    HOST, PORT, DEBUG, MAX_CONTENT_LENGTH_BYTES
    CORS_ORIGINS  (comma-separated allowlist; ``*`` for wide-open)
    RATE_LIMIT, RATE_LIMIT_DEFAULT  (e.g. RATE_LIMIT_DEFAULT="60/minute;10/second")
    PREFLIGHT_CACHE_SECS

The backend talks to any OpenAI-compatible chat-completions endpoint
(OpenAI, Groq, Together, NVIDIA NIM, a local llama.cpp server, …).
"""
import os
import platform


def _env(name: str, default: str = "") -> str:
    return os.environ.get(name, default)


# ─── Engine selection ──────────────────────────────────────────────────────
#
# The video build pipeline auto-selects between two engines based on the
# host OS so the same backend works on a Windows dev box (PowerPoint COM
# automation) and a Linux server (MoviePy + ffmpeg). Override by exporting
# ``USE_POWERPOINT=1`` / ``USE_POWERPOINT=0`` in the environment.

USE_POWERPOINT = _env("USE_POWERPOINT", "").strip().lower() in {"1", "true", "yes", "on"} \
    if _env("USE_POWERPOINT") else (platform.system() == "Windows")


# ─── API Configuration ─────────────────────────────────────────────────────
#
# `API_KEY` and `API_URL` are used by the vision/OCR client
# (src/core/vision_client.py). `API_URL` is also used as the base URL for
# chat completions β€” each entry in MODELS_CONFIG below provides its own
# `api_key` so you can mix providers if you want.

API_KEY = _env("API_KEY", "your-api-key-here")
API_URL = _env("API_URL", "https://integrate.api.nvidia.com/v1")
DEFAULT_MAX_TOKENS = int(_env("MAX_TOKENS", "32768"))

# ─── Chat Models (used by src/core/ai_client.py) ───────────────────────────
#
# The frontend exposes named model choices. Feel free
# to point them at any model your provider exposes. Each entry MUST define
# `model`, `temperature`, `top_p`, `max_tokens`, and `api_key`.

# The keys here are what the UI ships as `model_choice`. Each one maps
# to a build.nvidia.com model slug that is callable through
# `https://integrate.api.nvidia.com/v1`. See
# https://build.nvidia.com/models for the live catalog β€” "Free Endpoint"
# models are always callable with a developer API key, "Downloadable"
# entries require self-hosting.

MODELS_CONFIG = {
    # Default β€” Qwen 3.5 122B MoE (10B active). Large + capable; user
    # pick from the UI's "Default" slot.
    "default": {
        "model": _env("MODEL_DEFAULT", "qwen/qwen3.5-122b-a10b"),
        "temperature": 0.2,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_DEFAULT", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
    # Fast β€” DeepSeek V4 Flash (284B MoE, 1M ctx), tuned for throughput.
    "fast": {
        "model": _env("MODEL_FAST", "deepseek-ai/deepseek-v4-flash"),
        "temperature": 0.3,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_FAST", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
    # Short & fastest β€” Llama 3.1 8B Instruct for very low latency runs.
    "short": {
        "model": _env("MODEL_SHORT", "meta/llama-3.1-8b-instruct"),
        "temperature": 0.3,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_SHORT", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
    # Balanced β€” GLM-4.7 (358B, 131K ctx). Solid middle ground for
    # coding / HTML generation + tool calling.
    "balanced": {
        "model": _env("MODEL_BALANCED", "z-ai/glm-4.7"),
        "temperature": 0.2,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_BALANCED", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
    # Quality β€” DeepSeek V3.2 (685B), long-context reasoning.
    "quality": {
        "model": _env("MODEL_QUALITY", "deepseek-ai/deepseek-v3.2"),
        "temperature": 0.1,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_QUALITY", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
    # Long context β€” DeepSeek V4 Pro (1.6T MoE, 49B active, 1M ctx).
    "long": {
        "model": _env("MODEL_LONG", "deepseek-ai/deepseek-v4-pro"),
        "temperature": 0.1,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_LONG", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
    # Llama 3.3 70B kept for back-compat with old stored runs that set
    # `model_choice: "llama"`. New UI does not expose it separately.
    "llama": {
        "model": _env("MODEL_LLAMA", "meta/llama-3.3-70b-instruct"),
        "temperature": 0.2,
        "top_p": 0.9,
        "max_tokens": int(_env("MAX_TOKENS_LLAMA", str(DEFAULT_MAX_TOKENS))),
        "api_key": API_KEY,
    },
}

# ─── Vision Model (used by src/core/vision_client.py) ──────────────────────

MODEL_VISION = _env("MODEL_VISION", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
MODEL_VISION_FALLBACK = _env("MODEL_VISION_FALLBACK", "meta/llama-3.2-90b-vision-instruct")

# ─── Application Settings ──────────────────────────────────────────────────
#
# Defaults mirror the safe values used by ``app.py`` when run as a script.
# DEBUG defaults to False to avoid the Werkzeug debugger (which is
# remote-code-execution-by-design) being on accidentally.

DEBUG = _env("DEBUG", "0").lower() in {"1", "true", "yes", "on"}
PORT = int(_env("PORT", "5000"))
HOST = _env("HOST", "127.0.0.1")  # bind to loopback by default

# ─── Output Folders (relative to backend/) ─────────────────────────────────

OUTPUT_FOLDER = "output/screenshots"
HTML_FOLDER = "output/html"

# ─── Screenshot Settings ───────────────────────────────────────────────────

DEFAULT_VIEWPORT_WIDTH = 1920
DEFAULT_VIEWPORT_HEIGHT = 1080
DEFAULT_ZOOM = 2.1
DEFAULT_OVERLAP = 15
MAX_SCREENSHOTS_LIMIT = 50

# Cap on how many pages we'll rasterize from a PDF in /extract-from-image.
PDF_MAX_PAGES = int(_env("PDF_MAX_PAGES", "100"))

# ─── AI Settings ───────────────────────────────────────────────────────────

MAX_TOKENS = DEFAULT_MAX_TOKENS
TEMPERATURE = 0.2

# ─── PowerPoint Automation Settings (Windows only) ─────────────────────────

POWERPOINT_ENABLED = True
POWERPOINT_TEMPLATE_PATH = "templates/powerpoint/default.pptm"
POWERPOINT_OUTPUT_FOLDER = "output/presentations"
POWERPOINT_VIDEO_FOLDER = "output/videos"

# ─── Slide Settings ────────────────────────────────────────────────────────

DEFAULT_SLIDE_DURATION = 5.0          # seconds per slide
DEFAULT_TRANSITION_TYPE = "fade"      # fade | push | wipe | none
DEFAULT_TRANSITION_DURATION = 0.5     # seconds for transition

# ─── Video Export Settings ─────────────────────────────────────────────────

VIDEO_RESOLUTION_WIDTH = 1920
VIDEO_RESOLUTION_HEIGHT = 1080
VIDEO_FPS = 30
VIDEO_QUALITY = 5               # 1-5, where 5 is highest
VIDEO_FORMAT = "mp4"

# ─── Image Insertion Settings ──────────────────────────────────────────────

IMAGE_FIT_MODE = "contain"      # contain | cover | fill
IMAGE_POSITION = "center"       # center | top | bottom
PRESERVE_ASPECT_RATIO = True