File size: 2,406 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | # =============================================================================
# LLM Provider
# =============================================================================
# Profile: vllm (default, AMD MI300X) | ollama (local fallback) | dummy (CI/eval)
LLM_PROFILE=vllm
# vLLM (AMD Developer Cloud MI300X) — DEFAULT
# Point this at the public URL of your AMD MI300X vLLM endpoint.
# Local dev: http://localhost:8000/v1
VLLM_BASE_URL=http://localhost:8000/v1
VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
VLLM_API_KEY=
# VLLM_API_KEY left blank = client sends "EMPTY" (vLLM no-auth mode)
# In production set a real key and start vLLM with --api-key <key>
VLLM_TEMPERATURE=0.0
VLLM_MAX_TOKENS=4096
# Ollama (optional local fallback, only when LLM_PROFILE=ollama)
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_MODEL=qwen2.5:7b-instruct
# =============================================================================
# Embedding (sentence-transformers / Hugging Face, runs locally on CPU)
# =============================================================================
# Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual incl. EN/HU/DE/FR/...)
# Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)
EMBEDDING_MODEL=BAAI/bge-m3
# =============================================================================
# Storage
# =============================================================================
CHROMA_PATH=./chroma_db
CHROMA_COLLECTION=documents
CHECKPOINT_DB_PATH=./data/checkpoints.sqlite
# =============================================================================
# Pipeline tuning
# =============================================================================
CHUNK_MAX_CHARS=15000
CHUNK_OVERLAP_CHARS=500
SINGLE_CALL_THRESHOLD=30000
# Agentic loop guards
CHAT_MAX_ITERATIONS=10
VALIDATOR_MAX_RETRIES=2
DD_SUPERVISOR_MAX_ITERATIONS=4
# =============================================================================
# LangSmith observability (optional)
# =============================================================================
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_API_KEY=lsv2_pt_XXXXXXXXXXXXXXXXXXXXXXX
# LANGCHAIN_PROJECT=document-intelligence-amd
# =============================================================================
# Streamlit
# =============================================================================
STREAMLIT_PORT=8501
|