Spaces:

ub-aac-chatbot
/

aac-chatbot

Sleeping

File size: 4,253 Bytes

from pathlib import Path

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env", env_file_encoding="utf-8", extra="ignore"
    )

    # ── Paths ──────────────────────────────────────────────────────────────────
    data_dir: Path = Path("data")
    vector_store_dir: Path = Path("data/vector_store")
    memories_dir: Path = Path("data/memories")
    users_json: Path = Path("data/users.json")
    logs_dir: Path = Path("logs")

    # ── Retrieval ────────────────────────────────────────────────────────────
    embed_model: str = "BAAI/bge-small-en-v1.5"
    retrieval_top_k: int = 5
    retrieval_rerank_k: int = 3
    retrieval_fast_k: int = 2  # used when affect == FRUSTRATED
    # Minimum cosine score for a chunk to be used in turnaround re-retrieval.
    # Below this, we'd rather fall back to original chunks than serve clearly
    # off-topic memories just to "look different."
    turnaround_min_score: float = 0.45

    rerank_enabled: bool = True
    rerank_pool_k: int = 12  # wider pre-rerank fetch per personal sub-intent
    rerank_fast_pool_k: int = 8  # smaller pool on the FRUSTRATED fast path
    rerank_lambda: float = 0.7  # MMR: relevance vs diversity (1.0 = pure cosine)
    rerank_history_turns: int = 2  # last-N user turns folded into context vector
    rerank_query_weight: float = 0.7  # current query weight vs history mean

    # LLM tiers — both hit Ollama Cloud via OpenAI-compatible endpoint.
    # Same model on both tiers for now; swap one when a larger cloud model
    # is provisioned and the latency-fallback should branch.
    primary_model: str = "gemma4:31b-cloud"
    primary_base_url: str = "http://localhost:11434/v1"
    primary_api_key: str = "ollama"

    fallback_model: str = "gemma4:31b-cloud"
    fallback_base_url: str = "http://localhost:11434/v1"
    fallback_api_key: str = "ollama"

    # Active tier: "primary" | "fallback"
    active_llm_tier: str = "primary"

    # Vision model used only by /ink/recognize (needs image_url support).
    # Defaults to Gemini flash via the OpenAI-compatible endpoint.
    ink_vision_model: str = "gemini-2.0-flash"
    ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
    ink_vision_api_key: str = ""

    # off | strip | full | suppress
    thinking_mode: str = "off"
    thinking_token_budget: int = 4096
    fallback_latency_threshold: float = 3.5  # seconds before tier fallback

    # ── Generation ────────────────────────────────────────────────────────────
    max_tokens_happy: int = 150
    max_tokens_neutral: int = 100
    max_tokens_frustrated: int = 60
    max_tokens_surprised: int = 80

    # ── Sensing ───────────────────────────────────────────────────────────────
    affect_ema_alpha: float = 0.3  # exponential moving average smoothing
    gaze_dwell_threshold_s: float = 1.5
    air_write_velocity_start: int = 15  # px/frame — stroke begin threshold
    air_write_velocity_end: int = 5  # px/frame — stroke end threshold
    air_write_end_gap_ms: int = 200  # ms of stillness to end a stroke
    conflict_overlap_ms: int = 500  # audio + gesture co-occurrence window

    # ── Evaluation ────────────────────────────────────────────────────────────
    slo_target_s: float = 6.0  # max acceptable response latency (seconds)
    evals_enabled: bool = True
    nli_model: str = "cross-encoder/nli-deberta-v3-small"
    faithfulness_threshold: float = (
        0.5  # entailment prob for a sentence to count as grounded
    )


settings = Settings()