Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| from pydantic_settings import BaseSettings, SettingsConfigDict | |
| class Settings(BaseSettings): | |
| model_config = SettingsConfigDict( | |
| env_file=".env", env_file_encoding="utf-8", extra="ignore" | |
| ) | |
| # ββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| data_dir: Path = Path("data") | |
| vector_store_dir: Path = Path("data/vector_store") | |
| memories_dir: Path = Path("data/memories") | |
| users_json: Path = Path("data/users.json") | |
| logs_dir: Path = Path("logs") | |
| # ββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| embed_model: str = "BAAI/bge-small-en-v1.5" | |
| retrieval_top_k: int = 5 | |
| retrieval_rerank_k: int = 3 | |
| retrieval_fast_k: int = 2 # used when affect == FRUSTRATED | |
| # Minimum cosine score for a chunk to be used in turnaround re-retrieval. | |
| # Below this, we'd rather fall back to original chunks than serve clearly | |
| # off-topic memories just to "look different." | |
| turnaround_min_score: float = 0.45 | |
| rerank_enabled: bool = True | |
| rerank_pool_k: int = 12 # wider pre-rerank fetch per personal sub-intent | |
| rerank_fast_pool_k: int = 8 # smaller pool on the FRUSTRATED fast path | |
| rerank_lambda: float = 0.7 # MMR: relevance vs diversity (1.0 = pure cosine) | |
| rerank_history_turns: int = 2 # last-N user turns folded into context vector | |
| rerank_query_weight: float = 0.7 # current query weight vs history mean | |
| # LLM tiers β both hit Ollama Cloud via OpenAI-compatible endpoint. | |
| # Same model on both tiers for now; swap one when a larger cloud model | |
| # is provisioned and the latency-fallback should branch. | |
| primary_model: str = "gemma4:31b-cloud" | |
| primary_base_url: str = "http://localhost:11434/v1" | |
| primary_api_key: str = "ollama" | |
| fallback_model: str = "gemma4:31b-cloud" | |
| fallback_base_url: str = "http://localhost:11434/v1" | |
| fallback_api_key: str = "ollama" | |
| # Active tier: "primary" | "fallback" | |
| active_llm_tier: str = "primary" | |
| # Vision model used only by /ink/recognize (needs image_url support). | |
| # Defaults to Gemini flash via the OpenAI-compatible endpoint. | |
| ink_vision_model: str = "gemini-2.0-flash" | |
| ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/" | |
| ink_vision_api_key: str = "" | |
| # off | strip | full | suppress | |
| thinking_mode: str = "off" | |
| thinking_token_budget: int = 4096 | |
| fallback_latency_threshold: float = 3.5 # seconds before tier fallback | |
| # ββ Generation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| max_tokens_happy: int = 150 | |
| max_tokens_neutral: int = 100 | |
| max_tokens_frustrated: int = 60 | |
| max_tokens_surprised: int = 80 | |
| # ββ Sensing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| affect_ema_alpha: float = 0.3 # exponential moving average smoothing | |
| gaze_dwell_threshold_s: float = 1.5 | |
| air_write_velocity_start: int = 15 # px/frame β stroke begin threshold | |
| air_write_velocity_end: int = 5 # px/frame β stroke end threshold | |
| air_write_end_gap_ms: int = 200 # ms of stillness to end a stroke | |
| conflict_overlap_ms: int = 500 # audio + gesture co-occurrence window | |
| # ββ Evaluation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| slo_target_s: float = 6.0 # max acceptable response latency (seconds) | |
| evals_enabled: bool = True | |
| nli_model: str = "cross-encoder/nli-deberta-v3-small" | |
| faithfulness_threshold: float = ( | |
| 0.5 # entailment prob for a sentence to count as grounded | |
| ) | |
| settings = Settings() | |