Spaces:

ub-aac-chatbot
/

aac-chatbot

Sleeping

App Files Files Community

aac-chatbot / backend /config /settings.py

akashkolte

air-writing

8539a00 19 days ago

raw

history blame contribute delete

4.25 kB

	from pathlib import Path

	from pydantic_settings import BaseSettings, SettingsConfigDict


	class Settings(BaseSettings):
	model_config = SettingsConfigDict(
	env_file=".env", env_file_encoding="utf-8", extra="ignore"
	)

	# ── Paths ──────────────────────────────────────────────────────────────────
	data_dir: Path = Path("data")
	vector_store_dir: Path = Path("data/vector_store")
	memories_dir: Path = Path("data/memories")
	users_json: Path = Path("data/users.json")
	logs_dir: Path = Path("logs")

	# ── Retrieval ────────────────────────────────────────────────────────────
	embed_model: str = "BAAI/bge-small-en-v1.5"
	retrieval_top_k: int = 5
	retrieval_rerank_k: int = 3
	retrieval_fast_k: int = 2 # used when affect == FRUSTRATED
	# Minimum cosine score for a chunk to be used in turnaround re-retrieval.
	# Below this, we'd rather fall back to original chunks than serve clearly
	# off-topic memories just to "look different."
	turnaround_min_score: float = 0.45

	rerank_enabled: bool = True
	rerank_pool_k: int = 12 # wider pre-rerank fetch per personal sub-intent
	rerank_fast_pool_k: int = 8 # smaller pool on the FRUSTRATED fast path
	rerank_lambda: float = 0.7 # MMR: relevance vs diversity (1.0 = pure cosine)
	rerank_history_turns: int = 2 # last-N user turns folded into context vector
	rerank_query_weight: float = 0.7 # current query weight vs history mean

	# LLM tiers — both hit Ollama Cloud via OpenAI-compatible endpoint.
	# Same model on both tiers for now; swap one when a larger cloud model
	# is provisioned and the latency-fallback should branch.
	primary_model: str = "gemma4:31b-cloud"
	primary_base_url: str = "http://localhost:11434/v1"
	primary_api_key: str = "ollama"

	fallback_model: str = "gemma4:31b-cloud"
	fallback_base_url: str = "http://localhost:11434/v1"
	fallback_api_key: str = "ollama"

	# Active tier: "primary" \| "fallback"
	active_llm_tier: str = "primary"

	# Vision model used only by /ink/recognize (needs image_url support).
	# Defaults to Gemini flash via the OpenAI-compatible endpoint.
	ink_vision_model: str = "gemini-2.0-flash"
	ink_vision_base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/"
	ink_vision_api_key: str = ""

	# off \| strip \| full \| suppress
	thinking_mode: str = "off"
	thinking_token_budget: int = 4096
	fallback_latency_threshold: float = 3.5 # seconds before tier fallback

	# ── Generation ────────────────────────────────────────────────────────────
	max_tokens_happy: int = 150
	max_tokens_neutral: int = 100
	max_tokens_frustrated: int = 60
	max_tokens_surprised: int = 80

	# ── Sensing ───────────────────────────────────────────────────────────────
	affect_ema_alpha: float = 0.3 # exponential moving average smoothing
	gaze_dwell_threshold_s: float = 1.5
	air_write_velocity_start: int = 15 # px/frame — stroke begin threshold
	air_write_velocity_end: int = 5 # px/frame — stroke end threshold
	air_write_end_gap_ms: int = 200 # ms of stillness to end a stroke
	conflict_overlap_ms: int = 500 # audio + gesture co-occurrence window

	# ── Evaluation ────────────────────────────────────────────────────────────
	slo_target_s: float = 6.0 # max acceptable response latency (seconds)
	evals_enabled: bool = True
	nli_model: str = "cross-encoder/nli-deberta-v3-small"
	faithfulness_threshold: float = (
	0.5 # entailment prob for a sentence to count as grounded
	)


	settings = Settings()