# Backend security & runtime configuration. # Copy to backend/.env and fill in. The file is git-ignored. # Werkzeug debug + reloader. NEVER set this to 1 in production — # it exposes a PIN-protected RCE shell. FLASK_DEBUG=0 # Bind address. Default is 127.0.0.1 (loopback only). Override only when # you have an authenticating reverse proxy in front of the app. FLASK_HOST=127.0.0.1 PORT=5000 # CORS allowlist (comma-separated). Wildcard is intentionally not supported. ALLOWED_ORIGINS=http://localhost:5173,http://127.0.0.1:5173 # Shared-secret auth. When set, every non-public request must carry # X-API-Key: . Leave blank to disable (e.g. for trusted localhost). API_KEY= API_URL=https://integrate.api.nvidia.com/v1 MODEL_DEFAULT=qwen/qwen3.5-122b-a10b MODEL_FAST=meta/llama-3.1-8b-instruct MODEL_QUALITY=deepseek-ai/deepseek-v4-pro MODEL_QWEN_122B=qwen/qwen3.5-122b-a10b MODEL_GLM_5_1=z-ai/glm-5.1 MODEL_DEEPSEEK_V4_PRO=deepseek-ai/deepseek-v4-pro MODEL_KIMI_2_5=moonshotai/kimi-k2.5 MODEL_NEMOTRON=nvidia/nemotron-3-super-120b-a12b MODEL_LLAMA=meta/llama-3.3-70b-instruct MAX_TOKENS=32768 # Per-IP rate limits (flask-limiter syntax: " per ", joined by ;). RATE_LIMIT_DEFAULT=200 per hour;30 per minute RATE_LIMIT_HEAVY=20 per hour;5 per minute # Optional: switch from in-memory limiter storage to Redis when running # multiple workers, e.g. RATE_LIMIT_STORAGE_URI=redis://localhost:6379 RATE_LIMIT_STORAGE_URI=memory:// # Max upload size in bytes (default 32 MiB). MAX_CONTENT_LENGTH_BYTES=33554432