File size: 5,085 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | """Central configuration — Pydantic BaseSettings env-bound.
Single source of truth: the ``settings = Settings()`` singleton. Every module
imports this. The ``.env`` file is automatically loaded (python-dotenv) if it
exists in the project root.
Profiles:
* ``LLM_PROFILE=vllm`` — Qwen 2.5 on AMD MI300X via vLLM (OpenAI-compat). Production default.
* ``LLM_PROFILE=ollama`` — local Ollama (Qwen 2.5 7B Instruct). Dev / data-privacy.
* ``LLM_PROFILE=dummy`` — deterministic stub (CI / eval / load).
"""
from __future__ import annotations
from pathlib import Path
from typing import Literal
from pydantic import Field, computed_field
from pydantic_settings import BaseSettings, SettingsConfigDict
# Project root absolute path — independent of where we are launched from
PROJECT_ROOT = Path(__file__).resolve().parent
class Settings(BaseSettings):
"""Full application runtime configuration.
Every field reads from .env or env vars, with defaults. If .env does not
exist, the defaults run.
"""
model_config = SettingsConfigDict(
env_file=PROJECT_ROOT / ".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore", # don't raise on unknown env vars (e.g. LANGCHAIN_*)
)
# ---------------------------------------------------------------------
# LLM provider selection
# ---------------------------------------------------------------------
llm_profile: Literal["vllm", "ollama", "dummy"] = "vllm"
"""Default LLM profile. Runtime override:
``graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})``."""
# vLLM (AMD Developer Cloud MI300X) — production default
vllm_base_url: str = "http://localhost:8000/v1"
"""vLLM endpoint URL. In production: http://<mi300x-public-ip>:8000/v1"""
vllm_model: str = "Qwen/Qwen2.5-14B-Instruct"
"""Model id served by vLLM. Alternatives: Qwen/Qwen2.5-32B-Instruct, Qwen/Qwen2.5-7B-Instruct."""
vllm_api_key: str | None = None
"""Optional API key for vLLM. If unset, sent as 'EMPTY' (vLLM no-auth mode).
In production set a real key and start vLLM with --api-key <key>."""
vllm_temperature: float = 0.0
vllm_max_tokens: int = 4096
# Ollama — local fallback
ollama_base_url: str = "http://localhost:11434"
ollama_model: str = "qwen2.5:7b-instruct"
ollama_temperature: float = 0.0
# ---------------------------------------------------------------------
# Embedding model — sentence-transformers, runs locally on CPU
# ---------------------------------------------------------------------
embedding_model: str = "BAAI/bge-m3"
"""Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual EN/HU/DE/FR/...).
Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)."""
# ---------------------------------------------------------------------
# Storage
# ---------------------------------------------------------------------
chroma_path: Path = Field(default=PROJECT_ROOT / "chroma_db")
chroma_collection: str = "documents"
checkpoint_db_path: Path = Field(default=PROJECT_ROOT / "data" / "checkpoints.sqlite")
# ---------------------------------------------------------------------
# Pipeline tuning
# ---------------------------------------------------------------------
chunk_max_chars: int = 15_000
chunk_overlap_chars: int = 500
single_call_threshold: int = 30_000
"""If doc.full_text < this many chars, a single LLM call is enough (no chunking)."""
# Loop guards
chat_max_iterations: int = 10
"""Chat agent ↔ tools loop max iterations — infinite-loop guard."""
validator_max_retries: int = 2
"""Chat validator → agent retry count when source citations are missing."""
dd_supervisor_max_iterations: int = 4
"""DD supervisor max iterations before forced synthesizer fallback."""
# ---------------------------------------------------------------------
# Streamlit
# ---------------------------------------------------------------------
streamlit_port: int = 8501
# ---------------------------------------------------------------------
# LangSmith observability (optional)
# ---------------------------------------------------------------------
langchain_tracing_v2: bool = False
langchain_api_key: str | None = None
langchain_project: str = "document-intelligence-amd"
# ---------------------------------------------------------------------
# Computed fields
# ---------------------------------------------------------------------
@computed_field
@property
def project_root(self) -> Path:
return PROJECT_ROOT
@computed_field
@property
def langsmith_enabled(self) -> bool:
return self.langchain_tracing_v2 and bool(self.langchain_api_key)
@computed_field
@property
def is_dummy(self) -> bool:
return self.llm_profile == "dummy"
# Singleton — every module imports this
settings = Settings()
|