| # ============================================================================= |
| # Research AI Assistant API - Environment Configuration |
| # ============================================================================= |
| # Copy this content to a file named .env and fill in your actual values |
| # Never commit .env to version control! |
|
|
| # ============================================================================= |
| # ZeroGPU Chat API Configuration (REQUIRED) |
| # ============================================================================= |
| # Base URL for your ZeroGPU Chat API endpoint (RunPod) |
| # Format: http://your-pod-ip:8000 or https://your-domain.com |
| # Example: http://bm9njt1ypzvuqw-8000.proxy.runpod.net |
| ZEROGPU_BASE_URL=http://your-pod-ip:8000 |
|
|
| # Email for authentication (register first via /register endpoint) |
| ZEROGPU_EMAIL=your-email@example.com |
|
|
| # Password for authentication |
| ZEROGPU_PASSWORD=your_secure_password_here |
|
|
| # ============================================================================= |
| # Token Allocation Configuration |
| # ============================================================================= |
| # Maximum tokens dedicated for user input (prioritized over context) |
| # Recommended: 32000 tokens for DeepSeek R1 (128K context window) |
| USER_INPUT_MAX_TOKENS=32000 |
|
|
| # Maximum tokens for context preparation (includes user input + context) |
| # Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output) |
| CONTEXT_PREPARATION_BUDGET=115000 |
|
|
| # Context pruning threshold (should match context_preparation_budget) |
| CONTEXT_PRUNING_THRESHOLD=115000 |
|
|
| # Always prioritize user input over historical context |
| PRIORITIZE_USER_INPUT=True |
|
|
| # Model context window (actual limit for your deployed model) |
| # Default: 8192 tokens (adjust based on your model) |
| # This is the maximum total tokens (input + output) the model can handle |
| # Common values: 4096, 8192, 16384, 32768, etc. |
| ZEROGPU_MODEL_CONTEXT_WINDOW=8192 |
|
|
| # ============================================================================= |
| # Database Configuration |
| # ============================================================================= |
| # SQLite database path (default: sessions.db) |
| # Use /tmp/ for Docker/containerized environments |
| DB_PATH=sessions.db |
|
|
| # FAISS index path for embeddings (default: embeddings.faiss) |
| FAISS_INDEX_PATH=embeddings.faiss |
|
|
| # ============================================================================= |
| # Cache Configuration |
| # ============================================================================= |
| # HuggingFace cache directory (for any remaining model downloads) |
| HF_HOME=~/.cache/huggingface |
| TRANSFORMERS_CACHE=~/.cache/huggingface |
|
|
| # HuggingFace token (optional - only needed if using gated models) |
| HF_TOKEN= |
|
|
| # Cache TTL in seconds (default: 3600 = 1 hour) |
| CACHE_TTL=3600 |
|
|
| # ============================================================================= |
| # Session Configuration |
| # ============================================================================= |
| # Session timeout in seconds (default: 3600 = 1 hour) |
| SESSION_TIMEOUT=3600 |
|
|
| # Maximum session size in megabytes (default: 10 MB) |
| MAX_SESSION_SIZE_MB=10 |
|
|
| # ============================================================================= |
| # Performance Configuration |
| # ============================================================================= |
| # Maximum worker threads for parallel processing (default: 4) |
| MAX_WORKERS=4 |
|
|
| # ============================================================================= |
| # Mobile Optimization |
| # ============================================================================= |
| # Maximum tokens for mobile responses (default: 1200) |
| # Increased from 800 to allow better responses on mobile |
| MOBILE_MAX_TOKENS=1200 |
|
|
| # Mobile request timeout in milliseconds (default: 15000) |
| MOBILE_TIMEOUT=15000 |
|
|
| # ============================================================================= |
| # API Configuration |
| # ============================================================================= |
| # Flask/Gradio server port (default: 7860) |
| GRADIO_PORT=7860 |
|
|
| # Server host (default: 0.0.0.0 for all interfaces) |
| GRADIO_HOST=0.0.0.0 |
|
|
| # ============================================================================= |
| # Logging Configuration |
| # ============================================================================= |
| # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO) |
| LOG_LEVEL=INFO |
|
|
| # Log format: json or text (default: json) |
| LOG_FORMAT=json |
|
|
| # Log directory (default: /tmp/logs) |
| LOG_DIR=/tmp/logs |
|
|
| # ============================================================================= |
| # Context Configuration |
| # ============================================================================= |
| # Maximum context tokens (default: 4000) |
| # Note: This is overridden by CONTEXT_PREPARATION_BUDGET if set |
| MAX_CONTEXT_TOKENS=4000 |
|
|
| # Cache TTL for context in seconds (default: 300 = 5 minutes) |
| CACHE_TTL_SECONDS=300 |
|
|
| # Maximum cache size (default: 100) |
| MAX_CACHE_SIZE=100 |
|
|
| # Enable parallel processing (default: True) |
| PARALLEL_PROCESSING=True |
|
|
| # Context decay factor (default: 0.8) |
| CONTEXT_DECAY_FACTOR=0.8 |
|
|
| # Maximum interactions to keep in context (default: 10) |
| MAX_INTERACTIONS_TO_KEEP=10 |
|
|
| # Enable metrics collection (default: True) |
| ENABLE_METRICS=True |
|
|
| # Enable context compression (default: True) |
| COMPRESSION_ENABLED=True |
|
|
| # Summarization threshold in tokens (default: 2000) |
| SUMMARIZATION_THRESHOLD=2000 |
|
|
| # ============================================================================= |
| # Model Selection (for context operations - if still using local models) |
| # ============================================================================= |
| # These are optional and only used if local models are still needed |
| # for context summarization or other operations |
| CONTEXT_SUMMARIZATION_MODEL=Qwen/Qwen2.5-7B-Instruct |
| CONTEXT_INTENT_MODEL=Qwen/Qwen2.5-7B-Instruct |
| CONTEXT_SYNTHESIS_MODEL=Qwen/Qwen2.5-7B-Instruct |
|
|
| # ============================================================================= |
| # Security Notes |
| # ============================================================================= |
| # - Never commit .env file to version control |
| # - Keep API keys secret and rotate them regularly |
| # - Use environment variables in production (not .env files) |
| # - Set proper file permissions: chmod 600 .env |
|
|
|
|