aac-chatbot / backend /config /settings.py
akashkolte's picture
air-writing
8539a00
from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env", env_file_encoding="utf-8", extra="ignore"
)
# ── Paths ──────────────────────────────────────────────────────────────────
data_dir: Path = Path("data")
vector_store_dir: Path = Path("data/vector_store")
memories_dir: Path = Path("data/memories")
users_json: Path = Path("data/users.json")
logs_dir: Path = Path("logs")
# ── Retrieval ────────────────────────────────────────────────────────────
embed_model: str = "BAAI/bge-small-en-v1.5"
retrieval_top_k: int = 5
retrieval_rerank_k: int = 3
retrieval_fast_k: int = 2 # used when affect == FRUSTRATED
# Minimum cosine score for a chunk to be used in turnaround re-retrieval.
# Below this, we'd rather fall back to original chunks than serve clearly
# off-topic memories just to "look different."
turnaround_min_score: float = 0.45
rerank_enabled: bool = True
rerank_pool_k: int = 12 # wider pre-rerank fetch per personal sub-intent
rerank_fast_pool_k: int = 8 # smaller pool on the FRUSTRATED fast path
rerank_lambda: float = 0.7 # MMR: relevance vs diversity (1.0 = pure cosine)
rerank_history_turns: int = 2 # last-N user turns folded into context vector
rerank_query_weight: float = 0.7 # current query weight vs history mean
# LLM tiers β€” both hit Ollama Cloud via OpenAI-compatible endpoint.
# Same model on both tiers for now; swap one when a larger cloud model
# is provisioned and the latency-fallback should branch.
primary_model: str = "gemma4:31b-cloud"
primary_base_url: str = "http://localhost:11434/v1"
primary_api_key: str = "ollama"
fallback_model: str = "gemma4:31b-cloud"
fallback_base_url: str = "http://localhost:11434/v1"
fallback_api_key: str = "ollama"
# Active tier: "primary" | "fallback"
active_llm_tier: str = "primary"
# Vision model used only by /ink/recognize (needs image_url support).
# Defaults to Gemini flash via the OpenAI-compatible endpoint.
ink_vision_model: str = "gemini-2.0-flash"
ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
ink_vision_api_key: str = ""
# off | strip | full | suppress
thinking_mode: str = "off"
thinking_token_budget: int = 4096
fallback_latency_threshold: float = 3.5 # seconds before tier fallback
# ── Generation ────────────────────────────────────────────────────────────
max_tokens_happy: int = 150
max_tokens_neutral: int = 100
max_tokens_frustrated: int = 60
max_tokens_surprised: int = 80
# ── Sensing ───────────────────────────────────────────────────────────────
affect_ema_alpha: float = 0.3 # exponential moving average smoothing
gaze_dwell_threshold_s: float = 1.5
air_write_velocity_start: int = 15 # px/frame β€” stroke begin threshold
air_write_velocity_end: int = 5 # px/frame β€” stroke end threshold
air_write_end_gap_ms: int = 200 # ms of stillness to end a stroke
conflict_overlap_ms: int = 500 # audio + gesture co-occurrence window
# ── Evaluation ────────────────────────────────────────────────────────────
slo_target_s: float = 6.0 # max acceptable response latency (seconds)
evals_enabled: bool = True
nli_model: str = "cross-encoder/nli-deberta-v3-small"
faithfulness_threshold: float = (
0.5 # entailment prob for a sentence to count as grounded
)
settings = Settings()