| # ============================================================================= | |
| # LLM Provider | |
| # ============================================================================= | |
| # Profile: vllm (default, AMD MI300X) | ollama (local fallback) | dummy (CI/eval) | |
| LLM_PROFILE=vllm | |
| # vLLM (AMD Developer Cloud MI300X) — DEFAULT | |
| # Point this at the public URL of your AMD MI300X vLLM endpoint. | |
| # Local dev: http://localhost:8000/v1 | |
| VLLM_BASE_URL=http://localhost:8000/v1 | |
| VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct | |
| VLLM_API_KEY= | |
| # VLLM_API_KEY left blank = client sends "EMPTY" (vLLM no-auth mode) | |
| # In production set a real key and start vLLM with --api-key <key> | |
| VLLM_TEMPERATURE=0.0 | |
| VLLM_MAX_TOKENS=4096 | |
| # Ollama (optional local fallback, only when LLM_PROFILE=ollama) | |
| OLLAMA_BASE_URL=http://localhost:11434 | |
| OLLAMA_MODEL=qwen2.5:7b-instruct | |
| # ============================================================================= | |
| # Embedding (sentence-transformers / Hugging Face, runs locally on CPU) | |
| # ============================================================================= | |
| # Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual incl. EN/HU/DE/FR/...) | |
| # Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only) | |
| EMBEDDING_MODEL=BAAI/bge-m3 | |
| # ============================================================================= | |
| # Storage | |
| # ============================================================================= | |
| CHROMA_PATH=./chroma_db | |
| CHROMA_COLLECTION=documents | |
| CHECKPOINT_DB_PATH=./data/checkpoints.sqlite | |
| # ============================================================================= | |
| # Pipeline tuning | |
| # ============================================================================= | |
| CHUNK_MAX_CHARS=15000 | |
| CHUNK_OVERLAP_CHARS=500 | |
| SINGLE_CALL_THRESHOLD=30000 | |
| # Agentic loop guards | |
| CHAT_MAX_ITERATIONS=10 | |
| VALIDATOR_MAX_RETRIES=2 | |
| DD_SUPERVISOR_MAX_ITERATIONS=4 | |
| # ============================================================================= | |
| # LangSmith observability (optional) | |
| # ============================================================================= | |
| # LANGCHAIN_TRACING_V2=true | |
| # LANGCHAIN_API_KEY=lsv2_pt_XXXXXXXXXXXXXXXXXXXXXXX | |
| # LANGCHAIN_PROJECT=document-intelligence-amd | |
| # ============================================================================= | |
| # Streamlit | |
| # ============================================================================= | |
| STREAMLIT_PORT=8501 | |