# ============================================================================= # LLM Provider # ============================================================================= # Profile: vllm (default, AMD MI300X) | ollama (local fallback) | dummy (CI/eval) LLM_PROFILE=vllm # vLLM (AMD Developer Cloud MI300X) — DEFAULT # Point this at the public URL of your AMD MI300X vLLM endpoint. # Local dev: http://localhost:8000/v1 VLLM_BASE_URL=http://localhost:8000/v1 VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct VLLM_API_KEY= # VLLM_API_KEY left blank = client sends "EMPTY" (vLLM no-auth mode) # In production set a real key and start vLLM with --api-key VLLM_TEMPERATURE=0.0 VLLM_MAX_TOKENS=4096 # Ollama (optional local fallback, only when LLM_PROFILE=ollama) OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_MODEL=qwen2.5:7b-instruct # ============================================================================= # Embedding (sentence-transformers / Hugging Face, runs locally on CPU) # ============================================================================= # Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual incl. EN/HU/DE/FR/...) # Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only) EMBEDDING_MODEL=BAAI/bge-m3 # ============================================================================= # Storage # ============================================================================= CHROMA_PATH=./chroma_db CHROMA_COLLECTION=documents CHECKPOINT_DB_PATH=./data/checkpoints.sqlite # ============================================================================= # Pipeline tuning # ============================================================================= CHUNK_MAX_CHARS=15000 CHUNK_OVERLAP_CHARS=500 SINGLE_CALL_THRESHOLD=30000 # Agentic loop guards CHAT_MAX_ITERATIONS=10 VALIDATOR_MAX_RETRIES=2 DD_SUPERVISOR_MAX_ITERATIONS=4 # ============================================================================= # LangSmith observability (optional) # ============================================================================= # LANGCHAIN_TRACING_V2=true # LANGCHAIN_API_KEY=lsv2_pt_XXXXXXXXXXXXXXXXXXXXXXX # LANGCHAIN_PROJECT=document-intelligence-amd # ============================================================================= # Streamlit # ============================================================================= STREAMLIT_PORT=8501