File size: 5,085 Bytes
7ff7119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""Central configuration — Pydantic BaseSettings env-bound.

Single source of truth: the ``settings = Settings()`` singleton. Every module
imports this. The ``.env`` file is automatically loaded (python-dotenv) if it
exists in the project root.

Profiles:
  * ``LLM_PROFILE=vllm``    — Qwen 2.5 on AMD MI300X via vLLM (OpenAI-compat). Production default.
  * ``LLM_PROFILE=ollama``  — local Ollama (Qwen 2.5 7B Instruct). Dev / data-privacy.
  * ``LLM_PROFILE=dummy``   — deterministic stub (CI / eval / load).
"""

from __future__ import annotations

from pathlib import Path
from typing import Literal

from pydantic import Field, computed_field
from pydantic_settings import BaseSettings, SettingsConfigDict

# Project root absolute path — independent of where we are launched from
PROJECT_ROOT = Path(__file__).resolve().parent


class Settings(BaseSettings):
    """Full application runtime configuration.

    Every field reads from .env or env vars, with defaults. If .env does not
    exist, the defaults run.
    """

    model_config = SettingsConfigDict(
        env_file=PROJECT_ROOT / ".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
        extra="ignore",  # don't raise on unknown env vars (e.g. LANGCHAIN_*)
    )

    # ---------------------------------------------------------------------
    # LLM provider selection
    # ---------------------------------------------------------------------
    llm_profile: Literal["vllm", "ollama", "dummy"] = "vllm"
    """Default LLM profile. Runtime override:
    ``graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})``."""

    # vLLM (AMD Developer Cloud MI300X) — production default
    vllm_base_url: str = "http://localhost:8000/v1"
    """vLLM endpoint URL. In production: http://<mi300x-public-ip>:8000/v1"""

    vllm_model: str = "Qwen/Qwen2.5-14B-Instruct"
    """Model id served by vLLM. Alternatives: Qwen/Qwen2.5-32B-Instruct, Qwen/Qwen2.5-7B-Instruct."""

    vllm_api_key: str | None = None
    """Optional API key for vLLM. If unset, sent as 'EMPTY' (vLLM no-auth mode).
    In production set a real key and start vLLM with --api-key <key>."""

    vllm_temperature: float = 0.0
    vllm_max_tokens: int = 4096

    # Ollama — local fallback
    ollama_base_url: str = "http://localhost:11434"
    ollama_model: str = "qwen2.5:7b-instruct"
    ollama_temperature: float = 0.0

    # ---------------------------------------------------------------------
    # Embedding model — sentence-transformers, runs locally on CPU
    # ---------------------------------------------------------------------
    embedding_model: str = "BAAI/bge-m3"
    """Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual EN/HU/DE/FR/...).
    Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)."""

    # ---------------------------------------------------------------------
    # Storage
    # ---------------------------------------------------------------------
    chroma_path: Path = Field(default=PROJECT_ROOT / "chroma_db")
    chroma_collection: str = "documents"
    checkpoint_db_path: Path = Field(default=PROJECT_ROOT / "data" / "checkpoints.sqlite")

    # ---------------------------------------------------------------------
    # Pipeline tuning
    # ---------------------------------------------------------------------
    chunk_max_chars: int = 15_000
    chunk_overlap_chars: int = 500
    single_call_threshold: int = 30_000
    """If doc.full_text < this many chars, a single LLM call is enough (no chunking)."""

    # Loop guards
    chat_max_iterations: int = 10
    """Chat agent ↔ tools loop max iterations — infinite-loop guard."""

    validator_max_retries: int = 2
    """Chat validator → agent retry count when source citations are missing."""

    dd_supervisor_max_iterations: int = 4
    """DD supervisor max iterations before forced synthesizer fallback."""

    # ---------------------------------------------------------------------
    # Streamlit
    # ---------------------------------------------------------------------
    streamlit_port: int = 8501

    # ---------------------------------------------------------------------
    # LangSmith observability (optional)
    # ---------------------------------------------------------------------
    langchain_tracing_v2: bool = False
    langchain_api_key: str | None = None
    langchain_project: str = "document-intelligence-amd"

    # ---------------------------------------------------------------------
    # Computed fields
    # ---------------------------------------------------------------------
    @computed_field
    @property
    def project_root(self) -> Path:
        return PROJECT_ROOT

    @computed_field
    @property
    def langsmith_enabled(self) -> bool:
        return self.langchain_tracing_v2 and bool(self.langchain_api_key)

    @computed_field
    @property
    def is_dummy(self) -> bool:
        return self.llm_profile == "dummy"


# Singleton — every module imports this
settings = Settings()