# Copy to `.env` (gitignored). Both frontend and backend read this file — # Vite loads `VITE_*` keys into the React build, the Flask backend # loads everything via python-dotenv at boot. # ─── Backend: AI provider ────────────────────────────────────────────────── API_KEY=your-api-key-here API_URL=https://integrate.api.nvidia.com/v1 # Optional per-tier model overrides # MODEL_DEFAULT=qwen/qwen3.5-122b-a10b # MODEL_FAST=meta/llama-3.1-8b-instruct # MODEL_QUALITY=deepseek-ai/deepseek-v4-pro # MODEL_QWEN_122B=qwen/qwen3.5-122b-a10b # MODEL_GLM_5_1=z-ai/glm-5.1 # MODEL_DEEPSEEK_V4_PRO=deepseek-ai/deepseek-v4-pro # MODEL_KIMI_2_5=moonshotai/kimi-k2.5 # MODEL_NEMOTRON=nvidia/nemotron-3-super-120b-a12b # MODEL_LLAMA=meta/llama-3.3-70b-instruct # MAX_TOKENS=32768 # MODEL_VISION=nvidia/llama-3.1-nemotron-nano-vl-8b-v1 # ─── Backend: server ──────────────────────────────────────────────────────── HOST=127.0.0.1 PORT=5000 DEBUG=0 # Set to 1 only if you genuinely need to bind 0.0.0.0 (e.g. behind a proxy). ALLOW_PUBLIC_BIND=0 # Comma-separated CORS allowlist. Use `*` only for prototyping. CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173,http://localhost:5000,http://127.0.0.1:5000 # Cap incoming request bodies (default 64 MB). # MAX_CONTENT_LENGTH_BYTES=67108864 # ─── Backend: optional rate limit ────────────────────────────────────────── # RATE_LIMIT=on # RATE_LIMIT_DEFAULT=60/minute;10/second # RATE_LIMIT_STORAGE=memory:// # ─── Backend: misc ───────────────────────────────────────────────────────── # PREFLIGHT_CACHE_SECS=15 # PDF_MAX_PAGES=100 # ─── Frontend (Vite picks up only VITE_*) ────────────────────────────────── # Override the dev/build-time backend URL. Empty = same origin (proxy). VITE_BACKEND_URL=