Spaces:
Sleeping
Sleeping
File size: 4,253 Bytes
e06dc15 375924d e06dc15 375924d e06dc15 084a2f9 e06dc15 5187368 e06dc15 5187368 e06dc15 375924d ce51e88 e06dc15 c511e09 5187368 e06dc15 5187368 e06dc15 5187368 e06dc15 8539a00 375924d d60da4f 375924d e06dc15 375924d e06dc15 375924d e06dc15 9ad188a ed5dd6f 9ad188a e06dc15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env", env_file_encoding="utf-8", extra="ignore"
)
# ββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
data_dir: Path = Path("data")
vector_store_dir: Path = Path("data/vector_store")
memories_dir: Path = Path("data/memories")
users_json: Path = Path("data/users.json")
logs_dir: Path = Path("logs")
# ββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
embed_model: str = "BAAI/bge-small-en-v1.5"
retrieval_top_k: int = 5
retrieval_rerank_k: int = 3
retrieval_fast_k: int = 2 # used when affect == FRUSTRATED
# Minimum cosine score for a chunk to be used in turnaround re-retrieval.
# Below this, we'd rather fall back to original chunks than serve clearly
# off-topic memories just to "look different."
turnaround_min_score: float = 0.45
rerank_enabled: bool = True
rerank_pool_k: int = 12 # wider pre-rerank fetch per personal sub-intent
rerank_fast_pool_k: int = 8 # smaller pool on the FRUSTRATED fast path
rerank_lambda: float = 0.7 # MMR: relevance vs diversity (1.0 = pure cosine)
rerank_history_turns: int = 2 # last-N user turns folded into context vector
rerank_query_weight: float = 0.7 # current query weight vs history mean
# LLM tiers β both hit Ollama Cloud via OpenAI-compatible endpoint.
# Same model on both tiers for now; swap one when a larger cloud model
# is provisioned and the latency-fallback should branch.
primary_model: str = "gemma4:31b-cloud"
primary_base_url: str = "http://localhost:11434/v1"
primary_api_key: str = "ollama"
fallback_model: str = "gemma4:31b-cloud"
fallback_base_url: str = "http://localhost:11434/v1"
fallback_api_key: str = "ollama"
# Active tier: "primary" | "fallback"
active_llm_tier: str = "primary"
# Vision model used only by /ink/recognize (needs image_url support).
# Defaults to Gemini flash via the OpenAI-compatible endpoint.
ink_vision_model: str = "gemini-2.0-flash"
ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
ink_vision_api_key: str = ""
# off | strip | full | suppress
thinking_mode: str = "off"
thinking_token_budget: int = 4096
fallback_latency_threshold: float = 3.5 # seconds before tier fallback
# ββ Generation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
max_tokens_happy: int = 150
max_tokens_neutral: int = 100
max_tokens_frustrated: int = 60
max_tokens_surprised: int = 80
# ββ Sensing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
affect_ema_alpha: float = 0.3 # exponential moving average smoothing
gaze_dwell_threshold_s: float = 1.5
air_write_velocity_start: int = 15 # px/frame β stroke begin threshold
air_write_velocity_end: int = 5 # px/frame β stroke end threshold
air_write_end_gap_ms: int = 200 # ms of stillness to end a stroke
conflict_overlap_ms: int = 500 # audio + gesture co-occurrence window
# ββ Evaluation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
slo_target_s: float = 6.0 # max acceptable response latency (seconds)
evals_enabled: bool = True
nli_model: str = "cross-encoder/nli-deberta-v3-small"
faithfulness_threshold: float = (
0.5 # entailment prob for a sentence to count as grounded
)
settings = Settings()
|