Spaces:
Running
Running
| # Backend security & runtime configuration. | |
| # Copy to backend/.env and fill in. The file is git-ignored. | |
| # Werkzeug debug + reloader. NEVER set this to 1 in production — | |
| # it exposes a PIN-protected RCE shell. | |
| FLASK_DEBUG=0 | |
| # Bind address. Default is 127.0.0.1 (loopback only). Override only when | |
| # you have an authenticating reverse proxy in front of the app. | |
| FLASK_HOST=127.0.0.1 | |
| PORT=5000 | |
| # CORS allowlist (comma-separated). Wildcard is intentionally not supported. | |
| ALLOWED_ORIGINS=http://localhost:5173,http://127.0.0.1:5173 | |
| # Shared-secret auth. When set, every non-public request must carry | |
| # X-API-Key: <value>. Leave blank to disable (e.g. for trusted localhost). | |
| API_KEY= | |
| API_URL=https://integrate.api.nvidia.com/v1 | |
| MODEL_DEFAULT=qwen/qwen3.5-122b-a10b | |
| MODEL_FAST=meta/llama-3.1-8b-instruct | |
| MODEL_QUALITY=deepseek-ai/deepseek-v4-pro | |
| MODEL_QWEN_122B=qwen/qwen3.5-122b-a10b | |
| MODEL_GLM_5_1=z-ai/glm-5.1 | |
| MODEL_DEEPSEEK_V4_PRO=deepseek-ai/deepseek-v4-pro | |
| MODEL_KIMI_2_5=moonshotai/kimi-k2.5 | |
| MODEL_NEMOTRON=nvidia/nemotron-3-super-120b-a12b | |
| MODEL_LLAMA=meta/llama-3.3-70b-instruct | |
| MAX_TOKENS=32768 | |
| # Per-IP rate limits (flask-limiter syntax: "<n> per <window>", joined by ;). | |
| RATE_LIMIT_DEFAULT=200 per hour;30 per minute | |
| RATE_LIMIT_HEAVY=20 per hour;5 per minute | |
| # Optional: switch from in-memory limiter storage to Redis when running | |
| # multiple workers, e.g. RATE_LIMIT_STORAGE_URI=redis://localhost:6379 | |
| RATE_LIMIT_STORAGE_URI=memory:// | |
| # Max upload size in bytes (default 32 MiB). | |
| MAX_CONTENT_LENGTH_BYTES=33554432 | |