File size: 4,253 Bytes
e06dc15
375924d
e06dc15
 
 
 
375924d
 
 
e06dc15
 
 
084a2f9
e06dc15
 
5187368
e06dc15
5187368
e06dc15
 
 
375924d
ce51e88
 
 
 
e06dc15
c511e09
 
 
 
 
 
 
5187368
 
 
 
 
 
e06dc15
5187368
 
 
e06dc15
5187368
 
e06dc15
8539a00
 
 
 
 
 
375924d
d60da4f
 
375924d
e06dc15
 
 
 
 
 
 
 
375924d
e06dc15
 
375924d
 
 
e06dc15
9ad188a
 
ed5dd6f
 
 
 
 
9ad188a
e06dc15
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from pathlib import Path

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env", env_file_encoding="utf-8", extra="ignore"
    )

    # ── Paths ──────────────────────────────────────────────────────────────────
    data_dir: Path = Path("data")
    vector_store_dir: Path = Path("data/vector_store")
    memories_dir: Path = Path("data/memories")
    users_json: Path = Path("data/users.json")
    logs_dir: Path = Path("logs")

    # ── Retrieval ────────────────────────────────────────────────────────────
    embed_model: str = "BAAI/bge-small-en-v1.5"
    retrieval_top_k: int = 5
    retrieval_rerank_k: int = 3
    retrieval_fast_k: int = 2  # used when affect == FRUSTRATED
    # Minimum cosine score for a chunk to be used in turnaround re-retrieval.
    # Below this, we'd rather fall back to original chunks than serve clearly
    # off-topic memories just to "look different."
    turnaround_min_score: float = 0.45

    rerank_enabled: bool = True
    rerank_pool_k: int = 12  # wider pre-rerank fetch per personal sub-intent
    rerank_fast_pool_k: int = 8  # smaller pool on the FRUSTRATED fast path
    rerank_lambda: float = 0.7  # MMR: relevance vs diversity (1.0 = pure cosine)
    rerank_history_turns: int = 2  # last-N user turns folded into context vector
    rerank_query_weight: float = 0.7  # current query weight vs history mean

    # LLM tiers β€” both hit Ollama Cloud via OpenAI-compatible endpoint.
    # Same model on both tiers for now; swap one when a larger cloud model
    # is provisioned and the latency-fallback should branch.
    primary_model: str = "gemma4:31b-cloud"
    primary_base_url: str = "http://localhost:11434/v1"
    primary_api_key: str = "ollama"

    fallback_model: str = "gemma4:31b-cloud"
    fallback_base_url: str = "http://localhost:11434/v1"
    fallback_api_key: str = "ollama"

    # Active tier: "primary" | "fallback"
    active_llm_tier: str = "primary"

    # Vision model used only by /ink/recognize (needs image_url support).
    # Defaults to Gemini flash via the OpenAI-compatible endpoint.
    ink_vision_model: str = "gemini-2.0-flash"
    ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
    ink_vision_api_key: str = ""

    # off | strip | full | suppress
    thinking_mode: str = "off"
    thinking_token_budget: int = 4096
    fallback_latency_threshold: float = 3.5  # seconds before tier fallback

    # ── Generation ────────────────────────────────────────────────────────────
    max_tokens_happy: int = 150
    max_tokens_neutral: int = 100
    max_tokens_frustrated: int = 60
    max_tokens_surprised: int = 80

    # ── Sensing ───────────────────────────────────────────────────────────────
    affect_ema_alpha: float = 0.3  # exponential moving average smoothing
    gaze_dwell_threshold_s: float = 1.5
    air_write_velocity_start: int = 15  # px/frame β€” stroke begin threshold
    air_write_velocity_end: int = 5  # px/frame β€” stroke end threshold
    air_write_end_gap_ms: int = 200  # ms of stillness to end a stroke
    conflict_overlap_ms: int = 500  # audio + gesture co-occurrence window

    # ── Evaluation ────────────────────────────────────────────────────────────
    slo_target_s: float = 6.0  # max acceptable response latency (seconds)
    evals_enabled: bool = True
    nli_model: str = "cross-encoder/nli-deberta-v3-small"
    faithfulness_threshold: float = (
        0.5  # entailment prob for a sentence to count as grounded
    )


settings = Settings()