paperhawk / config.py
NΓ‘ndorfi Vince
Initial paperhawk push to HF Space (LFS for binaries)
7ff7119
raw
history blame
5.09 kB
"""Central configuration β€” Pydantic BaseSettings env-bound.
Single source of truth: the ``settings = Settings()`` singleton. Every module
imports this. The ``.env`` file is automatically loaded (python-dotenv) if it
exists in the project root.
Profiles:
* ``LLM_PROFILE=vllm`` β€” Qwen 2.5 on AMD MI300X via vLLM (OpenAI-compat). Production default.
* ``LLM_PROFILE=ollama`` β€” local Ollama (Qwen 2.5 7B Instruct). Dev / data-privacy.
* ``LLM_PROFILE=dummy`` β€” deterministic stub (CI / eval / load).
"""
from __future__ import annotations
from pathlib import Path
from typing import Literal
from pydantic import Field, computed_field
from pydantic_settings import BaseSettings, SettingsConfigDict
# Project root absolute path β€” independent of where we are launched from
PROJECT_ROOT = Path(__file__).resolve().parent
class Settings(BaseSettings):
"""Full application runtime configuration.
Every field reads from .env or env vars, with defaults. If .env does not
exist, the defaults run.
"""
model_config = SettingsConfigDict(
env_file=PROJECT_ROOT / ".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore", # don't raise on unknown env vars (e.g. LANGCHAIN_*)
)
# ---------------------------------------------------------------------
# LLM provider selection
# ---------------------------------------------------------------------
llm_profile: Literal["vllm", "ollama", "dummy"] = "vllm"
"""Default LLM profile. Runtime override:
``graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})``."""
# vLLM (AMD Developer Cloud MI300X) β€” production default
vllm_base_url: str = "http://localhost:8000/v1"
"""vLLM endpoint URL. In production: http://<mi300x-public-ip>:8000/v1"""
vllm_model: str = "Qwen/Qwen2.5-14B-Instruct"
"""Model id served by vLLM. Alternatives: Qwen/Qwen2.5-32B-Instruct, Qwen/Qwen2.5-7B-Instruct."""
vllm_api_key: str | None = None
"""Optional API key for vLLM. If unset, sent as 'EMPTY' (vLLM no-auth mode).
In production set a real key and start vLLM with --api-key <key>."""
vllm_temperature: float = 0.0
vllm_max_tokens: int = 4096
# Ollama β€” local fallback
ollama_base_url: str = "http://localhost:11434"
ollama_model: str = "qwen2.5:7b-instruct"
ollama_temperature: float = 0.0
# ---------------------------------------------------------------------
# Embedding model β€” sentence-transformers, runs locally on CPU
# ---------------------------------------------------------------------
embedding_model: str = "BAAI/bge-m3"
"""Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual EN/HU/DE/FR/...).
Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)."""
# ---------------------------------------------------------------------
# Storage
# ---------------------------------------------------------------------
chroma_path: Path = Field(default=PROJECT_ROOT / "chroma_db")
chroma_collection: str = "documents"
checkpoint_db_path: Path = Field(default=PROJECT_ROOT / "data" / "checkpoints.sqlite")
# ---------------------------------------------------------------------
# Pipeline tuning
# ---------------------------------------------------------------------
chunk_max_chars: int = 15_000
chunk_overlap_chars: int = 500
single_call_threshold: int = 30_000
"""If doc.full_text < this many chars, a single LLM call is enough (no chunking)."""
# Loop guards
chat_max_iterations: int = 10
"""Chat agent ↔ tools loop max iterations β€” infinite-loop guard."""
validator_max_retries: int = 2
"""Chat validator β†’ agent retry count when source citations are missing."""
dd_supervisor_max_iterations: int = 4
"""DD supervisor max iterations before forced synthesizer fallback."""
# ---------------------------------------------------------------------
# Streamlit
# ---------------------------------------------------------------------
streamlit_port: int = 8501
# ---------------------------------------------------------------------
# LangSmith observability (optional)
# ---------------------------------------------------------------------
langchain_tracing_v2: bool = False
langchain_api_key: str | None = None
langchain_project: str = "document-intelligence-amd"
# ---------------------------------------------------------------------
# Computed fields
# ---------------------------------------------------------------------
@computed_field
@property
def project_root(self) -> Path:
return PROJECT_ROOT
@computed_field
@property
def langsmith_enabled(self) -> bool:
return self.langchain_tracing_v2 and bool(self.langchain_api_key)
@computed_field
@property
def is_dummy(self) -> bool:
return self.llm_profile == "dummy"
# Singleton β€” every module imports this
settings = Settings()