Spaces:
Sleeping
Sleeping
Commit Β·
5187368
1
Parent(s): 2d47b97
Remove library bloat
Browse files- .env.example +15 -26
- .gitignore +3 -7
- CLAUDE.md +51 -24
- README.md +124 -22
- backend/api/main.py +4 -6
- backend/config/settings.py +14 -23
- backend/generation/llm_client.py +7 -32
- backend/main.py +5 -6
- backend/pipeline/graph.py +16 -45
- backend/pipeline/nodes/feedback.py +42 -52
- backend/pipeline/nodes/intent.py +1 -2
- backend/pipeline/nodes/planner.py +3 -7
- backend/pipeline/nodes/retrieval.py +1 -3
- backend/pipeline/state.py +6 -7
- backend/retrieval/clustering.py +0 -84
- backend/retrieval/vector_store.py +64 -70
- backend/sensing/air_writing.py +0 -155
- backend/sensing/face_mesh.py +0 -145
- backend/sensing/gaze.py +0 -92
- backend/sensing/gesture.py +0 -102
- backend/sensing/labels.py +6 -0
- backend/ui/app.py +0 -163
- requirements.txt +5 -24
- setup.sh +5 -17
.env.example
CHANGED
|
@@ -1,36 +1,25 @@
|
|
| 1 |
-
# Copy
|
| 2 |
# Settings here override the defaults in config/settings.py.
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
# "primary" β Qwen3-30B-A3B on GCP A100/T4 via vLLM
|
| 7 |
-
# "fallback" β Qwen3-8B on same vLLM server
|
| 8 |
-
ACTIVE_LLM_TIER=local
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
FALLBACK_MODEL=
|
| 17 |
-
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
|
| 21 |
-
LOCAL_MODEL=gemma4:31b-cloud
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
MLFLOW_TRACKING_URI=sqlite:///mlflow.db
|
| 25 |
-
MLFLOW_EXPERIMENT=aac-chatbot
|
| 26 |
-
|
| 27 |
-
# ββ Thinking mode βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
-
# "off" β suppress thinking (fastest, best for latency-sensitive AAC)
|
| 29 |
-
# "strip" β let model think, but strip <think> tags from output
|
| 30 |
-
# "full" β return raw response including <think> blocks
|
| 31 |
THINKING_MODE=off
|
| 32 |
-
# Extra tokens added when thinking is enabled (strip/full). Ignored when off.
|
| 33 |
THINKING_TOKEN_BUDGET=4096
|
| 34 |
|
| 35 |
-
# ββ Latency fallback threshold (seconds) ββββββββββββββββββββββββββββββββββββββ
|
| 36 |
FALLBACK_LATENCY_THRESHOLD=3.5
|
|
|
|
| 1 |
+
# Copy to .env and fill in your values.
|
| 2 |
# Settings here override the defaults in config/settings.py.
|
| 3 |
|
| 4 |
+
# Active tier: "primary" | "fallback"
|
| 5 |
+
ACTIVE_LLM_TIER=primary
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# Both tiers hit Ollama Cloud over the OpenAI-compatible endpoint.
|
| 8 |
+
# Use a larger model on primary; the fallback fires when cumulative
|
| 9 |
+
# latency exceeds FALLBACK_LATENCY_THRESHOLD seconds.
|
| 10 |
+
PRIMARY_BASE_URL=http://localhost:11434/v1
|
| 11 |
+
PRIMARY_MODEL=gemma4:31b-cloud
|
| 12 |
+
PRIMARY_API_KEY=ollama
|
| 13 |
|
| 14 |
+
FALLBACK_BASE_URL=http://localhost:11434/v1
|
| 15 |
+
FALLBACK_MODEL=gemma4:31b-cloud
|
| 16 |
+
FALLBACK_API_KEY=ollama
|
| 17 |
|
| 18 |
+
# Per-turn logs are written as JSONL to <LOGS_DIR>/turns.jsonl
|
| 19 |
+
LOGS_DIR=logs
|
|
|
|
| 20 |
|
| 21 |
+
# off | strip | full | suppress
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
THINKING_MODE=off
|
|
|
|
| 23 |
THINKING_TOKEN_BUDGET=4096
|
| 24 |
|
|
|
|
| 25 |
FALLBACK_LATENCY_THRESHOLD=3.5
|
.gitignore
CHANGED
|
@@ -19,14 +19,10 @@ env/
|
|
| 19 |
# Data β indexes are rebuilt from source; do NOT commit binaries
|
| 20 |
data/faiss_store/
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
mlruns/
|
| 27 |
-
mlflow.db
|
| 28 |
-
|
| 29 |
-
# Latency logs
|
| 30 |
timings.csv
|
| 31 |
*.csv
|
| 32 |
|
|
|
|
| 19 |
# Data β indexes are rebuilt from source; do NOT commit binaries
|
| 20 |
data/faiss_store/
|
| 21 |
|
| 22 |
+
# Per-turn JSONL logs (contain user conversation content)
|
| 23 |
+
logs/
|
| 24 |
|
| 25 |
+
# Latency CSVs (legacy)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
timings.csv
|
| 27 |
*.csv
|
| 28 |
|
CLAUDE.md
CHANGED
|
@@ -5,7 +5,8 @@
|
|
| 5 |
An AI chatbot that **speaks as an AAC user**, not to them. Given a user persona
|
| 6 |
(Mia, Gerald, or Arjun), it fuses real-time multimodal non-verbal signals with
|
| 7 |
personal memory retrieval to generate responses in that person's authentic voice.
|
| 8 |
-
Orchestrated as a **
|
|
|
|
| 9 |
|
| 10 |
---
|
| 11 |
|
|
@@ -19,33 +20,45 @@ frontend/ React + Vite + TypeScript
|
|
| 19 |
backend/ Python (conda env: aac-chatbot)
|
| 20 |
main.py CLI entry point
|
| 21 |
api/main.py FastAPI REST API
|
| 22 |
-
pipeline/graph.py
|
| 23 |
pipeline/nodes/intent.py L2 β LLM + Pydantic intent routing
|
| 24 |
-
pipeline/nodes/retrieval.py L3 β
|
| 25 |
pipeline/nodes/planner.py L4 β expression-conditioned generation
|
| 26 |
-
pipeline/nodes/feedback.py L5 β
|
| 27 |
-
sensing/
|
| 28 |
-
retrieval/
|
| 29 |
-
generation/
|
| 30 |
guardrails/ Input + output safety checks
|
| 31 |
config/ Pydantic BaseSettings β all config in one place
|
| 32 |
|
| 33 |
-
data/ Shared data (personas,
|
|
|
|
| 34 |
```
|
| 35 |
|
| 36 |
## Key Design Decisions
|
| 37 |
|
| 38 |
-
- **
|
| 39 |
-
|
|
|
|
|
|
|
| 40 |
- **BGE-small-en-v1.5** for embeddings (beats MiniLM on MTEB at same speed)
|
| 41 |
-
- **
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
- **
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
- **Expression-conditioned response shaping** β affect steers tone, retrieval depth,
|
| 47 |
and candidate ranking (not just metadata annotation)
|
| 48 |
- **Bayesian bucket priors** β session-level P(bucket) updated after each accepted turn
|
|
|
|
|
|
|
| 49 |
- **Browser-side sensing** β MediaPipe JS runs in React frontend, only classified
|
| 50 |
labels (affect, gesture, gaze bucket) are sent to the backend API
|
| 51 |
|
|
@@ -69,7 +82,7 @@ data/ Shared data (personas, FAISS indexes)
|
|
| 69 |
# One-time setup
|
| 70 |
bash setup.sh
|
| 71 |
|
| 72 |
-
# CLI
|
| 73 |
python -m backend.main --debug
|
| 74 |
|
| 75 |
# Full stack
|
|
@@ -84,9 +97,10 @@ pnpm --dir frontend dev # React on :7550
|
|
| 84 |
All config lives in [backend/config/settings.py](backend/config/settings.py) as Pydantic `BaseSettings`.
|
| 85 |
Copy `.env.example` β `.env` and set:
|
| 86 |
|
| 87 |
-
- `ACTIVE_LLM_TIER` β `
|
| 88 |
-
- `
|
| 89 |
-
|
|
|
|
| 90 |
|
| 91 |
---
|
| 92 |
|
|
@@ -96,23 +110,36 @@ Copy `.env.example` β `.env` and set:
|
|
| 96 |
|------|---------|
|
| 97 |
| `data/users.json` | Flat user index (id, name, condition, style) |
|
| 98 |
| `data/memories/<uid>.json` | Full persona JSON with bucketed memories |
|
| 99 |
-
| `data/faiss_store/<uid>/` |
|
| 100 |
| `data/generate_users.py` | Regenerates memories + users.json |
|
| 101 |
|
| 102 |
---
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
## Development Notes
|
| 105 |
|
| 106 |
- **NEVER use local Ollama models** (e.g. `qwen3:8b`, `gemma3:1b`) β this machine
|
| 107 |
is not powerful enough and will break. Always use cloud-backed models like
|
| 108 |
-
`
|
| 109 |
- **Adding a persona**: add to `PERSONAS` in `data/generate_users.py`, re-run it,
|
| 110 |
then `python -m backend.retrieval.vector_store` to rebuild indexes
|
| 111 |
- **Changing LLM**: set `ACTIVE_LLM_TIER` in `.env` β no code changes needed
|
| 112 |
-
- **Extending sensing**:
|
| 113 |
-
`
|
|
|
|
|
|
|
|
|
|
| 114 |
- **Guardrail tuning**: edit signal lists in `backend/guardrails/checks.py`
|
| 115 |
- **Affect β generation mapping**: `_AFFECT_CONFIG` in `backend/pipeline/nodes/intent.py`
|
| 116 |
and `_PERSONA_TONE_OVERRIDES` in `backend/pipeline/nodes/planner.py`
|
| 117 |
-
-
|
|
|
|
| 118 |
- Frontend uses pnpm, Node 22+
|
|
|
|
| 5 |
An AI chatbot that **speaks as an AAC user**, not to them. Given a user persona
|
| 6 |
(Mia, Gerald, or Arjun), it fuses real-time multimodal non-verbal signals with
|
| 7 |
personal memory retrieval to generate responses in that person's authentic voice.
|
| 8 |
+
Orchestrated as a **plain Python function chain** across five layers, with two
|
| 9 |
+
conditional branches.
|
| 10 |
|
| 11 |
---
|
| 12 |
|
|
|
|
| 20 |
backend/ Python (conda env: aac-chatbot)
|
| 21 |
main.py CLI entry point
|
| 22 |
api/main.py FastAPI REST API
|
| 23 |
+
pipeline/graph.py run_pipeline() β plain function chain with 2 conditional branches
|
| 24 |
pipeline/nodes/intent.py L2 β LLM + Pydantic intent routing
|
| 25 |
+
pipeline/nodes/retrieval.py L3 β BGE embeddings + torch tensor cosine search (fast / full)
|
| 26 |
pipeline/nodes/planner.py L4 β expression-conditioned generation
|
| 27 |
+
pipeline/nodes/feedback.py L5 β JSONL turn logging + Bayesian bucket priors
|
| 28 |
+
sensing/labels.py GESTURE_TO_TAG label map (sensing itself runs in browser)
|
| 29 |
+
retrieval/ BGE embeddings (torch), Bayesian bucket priors
|
| 30 |
+
generation/ Two-tier LLM client (primary / fallback, both Ollama Cloud)
|
| 31 |
guardrails/ Input + output safety checks
|
| 32 |
config/ Pydantic BaseSettings β all config in one place
|
| 33 |
|
| 34 |
+
data/ Shared data (personas, vector indexes)
|
| 35 |
+
logs/ Per-turn JSONL logs (gitignored)
|
| 36 |
```
|
| 37 |
|
| 38 |
## Key Design Decisions
|
| 39 |
|
| 40 |
+
- **Plain function chain** orchestrates the pipeline (`run_pipeline` in
|
| 41 |
+
`backend/pipeline/graph.py`): intent β retrieval β planner β feedback,
|
| 42 |
+
with two conditional branches (affect picks fast/full retrieval; cumulative
|
| 43 |
+
latency picks primary/fallback LLM). No LangGraph / LangChain dependency.
|
| 44 |
- **BGE-small-en-v1.5** for embeddings (beats MiniLM on MTEB at same speed)
|
| 45 |
+
- **Torch tensor matmul** for vector search on the embedder's device
|
| 46 |
+
(mps β cuda β cpu). No FAISS, no separate index format. Stored as
|
| 47 |
+
`vectors.pt` per user. Headroom is ~100k vectors before approximate
|
| 48 |
+
search (`hnswlib`) becomes worthwhile.
|
| 49 |
+
- **No reranker** β cosine score from BGE-small carries the ranking signal
|
| 50 |
+
at current scales. Revisit when per-query `top_k` grows past ~30.
|
| 51 |
+
- **Two-tier Ollama Cloud LLM**: `primary` β `fallback` (when cumulative
|
| 52 |
+
latency exceeds `FALLBACK_LATENCY_THRESHOLD`). Both tiers hit Ollama
|
| 53 |
+
Cloud over the OpenAI-compatible endpoint. Models default to
|
| 54 |
+
`gemma4:31b-cloud`; swap one when a larger cloud model is provisioned.
|
| 55 |
+
- **Pydantic-validated** LLM routing output β `intent.py` retries on schema
|
| 56 |
+
failures (3 attempts) before falling back to a default route
|
| 57 |
- **Expression-conditioned response shaping** β affect steers tone, retrieval depth,
|
| 58 |
and candidate ranking (not just metadata annotation)
|
| 59 |
- **Bayesian bucket priors** β session-level P(bucket) updated after each accepted turn
|
| 60 |
+
- **Per-turn JSONL logging** β one line per turn appended to
|
| 61 |
+
`logs/turns.jsonl` (no MLflow). Query ad-hoc with DuckDB if needed.
|
| 62 |
- **Browser-side sensing** β MediaPipe JS runs in React frontend, only classified
|
| 63 |
labels (affect, gesture, gaze bucket) are sent to the backend API
|
| 64 |
|
|
|
|
| 82 |
# One-time setup
|
| 83 |
bash setup.sh
|
| 84 |
|
| 85 |
+
# CLI
|
| 86 |
python -m backend.main --debug
|
| 87 |
|
| 88 |
# Full stack
|
|
|
|
| 97 |
All config lives in [backend/config/settings.py](backend/config/settings.py) as Pydantic `BaseSettings`.
|
| 98 |
Copy `.env.example` β `.env` and set:
|
| 99 |
|
| 100 |
+
- `ACTIVE_LLM_TIER` β `primary` | `fallback`
|
| 101 |
+
- `PRIMARY_MODEL` / `FALLBACK_MODEL` β Ollama Cloud model identifiers
|
| 102 |
+
(e.g. `gemma4:31b-cloud`)
|
| 103 |
+
- `LOGS_DIR` β where per-turn JSONL logs are written (default: `logs/`)
|
| 104 |
|
| 105 |
---
|
| 106 |
|
|
|
|
| 110 |
|------|---------|
|
| 111 |
| `data/users.json` | Flat user index (id, name, condition, style) |
|
| 112 |
| `data/memories/<uid>.json` | Full persona JSON with bucketed memories |
|
| 113 |
+
| `data/faiss_store/<uid>/` | `vectors.pt` + `meta.json` β **rebuild after any persona edit** |
|
| 114 |
| `data/generate_users.py` | Regenerates memories + users.json |
|
| 115 |
|
| 116 |
---
|
| 117 |
|
| 118 |
+
## Code Style
|
| 119 |
+
|
| 120 |
+
- **Keep comments to a minimum.** Only comment what isn't obvious from the
|
| 121 |
+
code. No file headers explaining what a module does (the name and code
|
| 122 |
+
show that). No section divider banners (`# ββ Foo ββ`). No restating
|
| 123 |
+
what the next line does. Prefer one-line comments when needed.
|
| 124 |
+
- **Skip `from __future__ import annotations`.** The project is Python 3.10+
|
| 125 |
+
and uses native `X | None` / `list[dict]` syntax β the import adds nothing.
|
| 126 |
+
|
| 127 |
## Development Notes
|
| 128 |
|
| 129 |
- **NEVER use local Ollama models** (e.g. `qwen3:8b`, `gemma3:1b`) β this machine
|
| 130 |
is not powerful enough and will break. Always use cloud-backed models like
|
| 131 |
+
`gemma4:31b-cloud` via Ollama Cloud.
|
| 132 |
- **Adding a persona**: add to `PERSONAS` in `data/generate_users.py`, re-run it,
|
| 133 |
then `python -m backend.retrieval.vector_store` to rebuild indexes
|
| 134 |
- **Changing LLM**: set `ACTIVE_LLM_TIER` in `.env` β no code changes needed
|
| 135 |
+
- **Extending sensing**: sensing runs in the React frontend
|
| 136 |
+
(`frontend/src/hooks/useSensing.ts`); to add a new signal, classify it
|
| 137 |
+
there and add a label field to `PipelineState` in
|
| 138 |
+
`backend/pipeline/state.py`. Keep purely-data label maps in
|
| 139 |
+
`backend/sensing/labels.py`.
|
| 140 |
- **Guardrail tuning**: edit signal lists in `backend/guardrails/checks.py`
|
| 141 |
- **Affect β generation mapping**: `_AFFECT_CONFIG` in `backend/pipeline/nodes/intent.py`
|
| 142 |
and `_PERSONA_TONE_OVERRIDES` in `backend/pipeline/nodes/planner.py`
|
| 143 |
+
- Vector indexes in `data/faiss_store/` are gitignored β rebuilt from source JSONs
|
| 144 |
+
via `python -m backend.retrieval.vector_store`
|
| 145 |
- Frontend uses pnpm, Node 22+
|
README.md
CHANGED
|
@@ -4,7 +4,9 @@ An AI chatbot that **speaks as an AAC user**, not to them. Given a persona (Mia,
|
|
| 4 |
it fuses real-time multimodal non-verbal signals β facial expressions, hand gestures, gaze, and
|
| 5 |
air writing β with personal memory retrieval to generate responses in that person's authentic voice.
|
| 6 |
|
| 7 |
-
Built as a training-free, agentic RAG pipeline
|
|
|
|
|
|
|
| 8 |
|
| 9 |
---
|
| 10 |
|
|
@@ -36,7 +38,7 @@ a personalized digital twin that communicates on their behalf.
|
|
| 36 |
```
|
| 37 |
React Frontend (browser) Backend (Python)
|
| 38 |
MediaPipe JS sensing βββ
|
| 39 |
-
Chat UI ββββββββββββββββΌββ POST /chat βββΊ FastAPI βββΊ
|
| 40 |
Webcam feed ββββββββββββ β
|
| 41 |
L2 Intent βββΊ L3 Retrieval βββΊ L4 Generation βββΊ L5 Feedback
|
| 42 |
```
|
|
@@ -44,13 +46,13 @@ React Frontend (browser) Backend (Python)
|
|
| 44 |
| Layer | Module | What it does |
|
| 45 |
|-------|--------|-------------|
|
| 46 |
| L1 | `frontend/src/hooks/useSensing.ts` | MediaPipe JS β affect, gesture, gaze, air writing (browser-side) |
|
| 47 |
-
| L2 | `backend/pipeline/nodes/intent.py` |
|
| 48 |
-
| L3 | `backend/pipeline/nodes/retrieval.py` |
|
| 49 |
| L4 | `backend/pipeline/nodes/planner.py` | Expression-conditioned response generation (Qwen3) |
|
| 50 |
-
| L5 | `backend/pipeline/nodes/feedback.py` |
|
| 51 |
|
| 52 |
-
The pipeline
|
| 53 |
-
- FRUSTRATED affect β fast retrieval path (k=2
|
| 54 |
- Latency > 3.5s β fallback to smaller Qwen3-8B model
|
| 55 |
|
| 56 |
---
|
|
@@ -59,7 +61,8 @@ The pipeline runs as a **LangGraph stateful directed graph** with conditional ed
|
|
| 59 |
|
| 60 |
- Python **3.10+** (via conda)
|
| 61 |
- Node.js **22+** and **pnpm**
|
| 62 |
-
- [Ollama](https://ollama.com)
|
|
|
|
| 63 |
- A webcam (for live sensing; optional for CLI mode)
|
| 64 |
|
| 65 |
---
|
|
@@ -76,8 +79,8 @@ The setup script handles:
|
|
| 76 |
- Conda environment creation (`aac-chatbot`, Python 3.12)
|
| 77 |
- Python dependency installation
|
| 78 |
- `.env` file creation from template
|
| 79 |
-
-
|
| 80 |
-
-
|
| 81 |
- Frontend dependency installation (pnpm)
|
| 82 |
|
| 83 |
---
|
|
@@ -88,13 +91,12 @@ All settings live in [backend/config/settings.py](backend/config/settings.py) an
|
|
| 88 |
|
| 89 |
| Variable | Default | Description |
|
| 90 |
|----------|---------|-------------|
|
| 91 |
-
| `ACTIVE_LLM_TIER` | `
|
| 92 |
-
| `
|
| 93 |
-
| `
|
| 94 |
-
| `PRIMARY_BASE_URL` |
|
| 95 |
-
| `PRIMARY_MODEL` | `Qwen/Qwen3-30B-A3B` | Primary MoE model served via vLLM |
|
| 96 |
| `FALLBACK_LATENCY_THRESHOLD` | `3.5` | Seconds before falling back to smaller model |
|
| 97 |
-
| `
|
| 98 |
|
| 99 |
---
|
| 100 |
|
|
@@ -106,7 +108,7 @@ All settings live in [backend/config/settings.py](backend/config/settings.py) an
|
|
| 106 |
bash run.sh
|
| 107 |
```
|
| 108 |
|
| 109 |
-
This starts
|
| 110 |
Open [http://localhost:7550](http://localhost:7550) in your browser.
|
| 111 |
|
| 112 |
### CLI only
|
|
@@ -147,18 +149,19 @@ multimodal_aac_chatbot/
|
|
| 147 |
β βββ api/main.py FastAPI REST API
|
| 148 |
β βββ config/settings.py Pydantic BaseSettings
|
| 149 |
β βββ pipeline/
|
| 150 |
-
β β βββ graph.py
|
| 151 |
β β βββ state.py PipelineState TypedDict
|
| 152 |
β β βββ nodes/ intent, retrieval, planner, feedback
|
| 153 |
-
β βββ sensing/
|
| 154 |
-
β βββ retrieval/
|
| 155 |
-
β βββ generation/llm_client.py
|
| 156 |
β βββ guardrails/checks.py Input + output safety checks
|
| 157 |
β
|
| 158 |
βββ data/
|
| 159 |
β βββ users.json Persona index
|
| 160 |
β βββ memories/ Per-persona memory JSONs
|
| 161 |
-
β βββ faiss_store/
|
|
|
|
| 162 |
β
|
| 163 |
βββ setup.sh One-time setup script
|
| 164 |
βββ run.sh Start backend + frontend
|
|
@@ -184,7 +187,106 @@ To add a new persona, edit `data/generate_users.py` and re-run `python -m backen
|
|
| 184 |
|
| 185 |
## TODO
|
| 186 |
|
|
|
|
| 187 |
From the spec (pages 10β11). Tags: **[Core]** = must do, **[Bonus]** = nice to have, **[Eval]** = for the grade.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
Heads up: all camera/sensing stuff is in the frontend (MediaPipe JS). Backend just gets the labels (`affect`, `gesture_tag`, `gaze_bucket`). The `backend/sensing/` python modules are dead code.
|
| 190 |
|
|
|
|
| 4 |
it fuses real-time multimodal non-verbal signals β facial expressions, hand gestures, gaze, and
|
| 5 |
air writing β with personal memory retrieval to generate responses in that person's authentic voice.
|
| 6 |
|
| 7 |
+
Built as a training-free, agentic RAG pipeline β a plain-Python function chain
|
| 8 |
+
with two conditional branches (no LangGraph / LangChain), torch-tensor
|
| 9 |
+
retrieval (no FAISS), and JSONL turn logging (no MLflow).
|
| 10 |
|
| 11 |
---
|
| 12 |
|
|
|
|
| 38 |
```
|
| 39 |
React Frontend (browser) Backend (Python)
|
| 40 |
MediaPipe JS sensing βββ
|
| 41 |
+
Chat UI ββββββββββββββββΌββ POST /chat βββΊ FastAPI βββΊ run_pipeline()
|
| 42 |
Webcam feed ββββββββββββ β
|
| 43 |
L2 Intent βββΊ L3 Retrieval βββΊ L4 Generation βββΊ L5 Feedback
|
| 44 |
```
|
|
|
|
| 46 |
| Layer | Module | What it does |
|
| 47 |
|-------|--------|-------------|
|
| 48 |
| L1 | `frontend/src/hooks/useSensing.ts` | MediaPipe JS β affect, gesture, gaze, air writing (browser-side) |
|
| 49 |
+
| L2 | `backend/pipeline/nodes/intent.py` | Keyword-based intent routing (no LLM) |
|
| 50 |
+
| L3 | `backend/pipeline/nodes/retrieval.py` | BGE-small embeddings + torch tensor cosine search (mps/cuda/cpu) |
|
| 51 |
| L4 | `backend/pipeline/nodes/planner.py` | Expression-conditioned response generation (Qwen3) |
|
| 52 |
+
| L5 | `backend/pipeline/nodes/feedback.py` | JSONL turn logging + Bayesian bucket prior update |
|
| 53 |
|
| 54 |
+
The pipeline is a plain Python function chain with two conditional branches:
|
| 55 |
+
- FRUSTRATED affect β fast retrieval path (k=2)
|
| 56 |
- Latency > 3.5s β fallback to smaller Qwen3-8B model
|
| 57 |
|
| 58 |
---
|
|
|
|
| 61 |
|
| 62 |
- Python **3.10+** (via conda)
|
| 63 |
- Node.js **22+** and **pnpm**
|
| 64 |
+
- An [Ollama Cloud](https://ollama.com) account β both LLM tiers hit
|
| 65 |
+
cloud-hosted models; no local Ollama daemon required
|
| 66 |
- A webcam (for live sensing; optional for CLI mode)
|
| 67 |
|
| 68 |
---
|
|
|
|
| 79 |
- Conda environment creation (`aac-chatbot`, Python 3.12)
|
| 80 |
- Python dependency installation
|
| 81 |
- `.env` file creation from template
|
| 82 |
+
- Vector index building (downloads BGE-small embedder on first run, saves
|
| 83 |
+
per-user `vectors.pt` under `data/faiss_store/`)
|
| 84 |
- Frontend dependency installation (pnpm)
|
| 85 |
|
| 86 |
---
|
|
|
|
| 91 |
|
| 92 |
| Variable | Default | Description |
|
| 93 |
|----------|---------|-------------|
|
| 94 |
+
| `ACTIVE_LLM_TIER` | `primary` | `primary` \| `fallback` |
|
| 95 |
+
| `PRIMARY_MODEL` | `gemma4:31b-cloud` | Ollama Cloud model for primary tier |
|
| 96 |
+
| `FALLBACK_MODEL` | `gemma4:31b-cloud` | Ollama Cloud model for fallback tier (smaller/faster) |
|
| 97 |
+
| `PRIMARY_BASE_URL` | `http://localhost:11434/v1` | Ollama-compatible endpoint |
|
|
|
|
| 98 |
| `FALLBACK_LATENCY_THRESHOLD` | `3.5` | Seconds before falling back to smaller model |
|
| 99 |
+
| `LOGS_DIR` | `logs` | Where per-turn JSONL logs are written |
|
| 100 |
|
| 101 |
---
|
| 102 |
|
|
|
|
| 108 |
bash run.sh
|
| 109 |
```
|
| 110 |
|
| 111 |
+
This starts FastAPI on `:8000` and React on `:7550`.
|
| 112 |
Open [http://localhost:7550](http://localhost:7550) in your browser.
|
| 113 |
|
| 114 |
### CLI only
|
|
|
|
| 149 |
β βββ api/main.py FastAPI REST API
|
| 150 |
β βββ config/settings.py Pydantic BaseSettings
|
| 151 |
β βββ pipeline/
|
| 152 |
+
β β βββ graph.py run_pipeline() β plain function chain
|
| 153 |
β β βββ state.py PipelineState TypedDict
|
| 154 |
β β βββ nodes/ intent, retrieval, planner, feedback
|
| 155 |
+
β βββ sensing/labels.py GESTURE_TO_TAG (sensing runs in browser)
|
| 156 |
+
β βββ retrieval/ BGE embeddings (torch tensor) + bucket priors
|
| 157 |
+
β βββ generation/llm_client.py 2-tier Ollama Cloud LLM client (primary/fallback)
|
| 158 |
β βββ guardrails/checks.py Input + output safety checks
|
| 159 |
β
|
| 160 |
βββ data/
|
| 161 |
β βββ users.json Persona index
|
| 162 |
β βββ memories/ Per-persona memory JSONs
|
| 163 |
+
β βββ faiss_store/ vectors.pt + meta.json (gitignored, rebuilt)
|
| 164 |
+
βββ logs/ Per-turn JSONL logs (gitignored)
|
| 165 |
β
|
| 166 |
βββ setup.sh One-time setup script
|
| 167 |
βββ run.sh Start backend + frontend
|
|
|
|
| 187 |
|
| 188 |
## TODO
|
| 189 |
|
| 190 |
+
<<<<<<< Updated upstream
|
| 191 |
From the spec (pages 10β11). Tags: **[Core]** = must do, **[Bonus]** = nice to have, **[Eval]** = for the grade.
|
| 192 |
+
=======
|
| 193 |
+
Roadmap derived from the project spec (pages 10β11). Items are grouped by spec
|
| 194 |
+
area and marked with priority. Bracketed tags map back to the spec:
|
| 195 |
+
**[Core]** = required deliverable, **[Bonus]** = stretch goal, **[Eval]** = validation.
|
| 196 |
+
|
| 197 |
+
> **Note on sensing:** all camera capture and signal classification happens in
|
| 198 |
+
> the **frontend** (MediaPipe JS). The backend only consumes pre-classified
|
| 199 |
+
> labels (`affect`, `gesture_tag`, `gaze_bucket`).
|
| 200 |
+
|
| 201 |
+
### Dataset
|
| 202 |
+
|
| 203 |
+
- [ ] **[Core]** Add **heterogeneous** memory types per persona β currently only
|
| 204 |
+
autobiographical narratives exist.
|
| 205 |
+
- [ ] Add a set of synthetic social-media posts per persona (voice-matched)
|
| 206 |
+
- [ ] Add a set of synthetic past communication logs per persona
|
| 207 |
+
- [ ] Regenerate the synthesis script to produce both, then rebuild embeddings
|
| 208 |
+
- [ ] Make ingestion type-aware so the retriever knows which chunk-type a hit came from
|
| 209 |
+
- [ ] **[Core]** Document the dataset schema so it is reusable by the evaluation harness.
|
| 210 |
+
|
| 211 |
+
### Multimodal Sensing (frontend)
|
| 212 |
+
|
| 213 |
+
- [ ] **[Core]** Detect **head-nod / sharp tilt as dissatisfaction**, distinct
|
| 214 |
+
from a generic frustrated affect read.
|
| 215 |
+
- [ ] Send a `dissatisfaction_signal` to the backend alongside the existing labels
|
| 216 |
+
- [ ] When the signal fires, branch the planner to a **"Turnaround Option"** β
|
| 217 |
+
a clarification candidate ("Did you mean X or Y?") instead of a plain answer
|
| 218 |
+
- [ ] **[Bonus]** Add **vocalisation capture** (Web Speech API) and a
|
| 219 |
+
**conflict-resolution** step that compares the spoken intent against the
|
| 220 |
+
air-written intent, sending a single `resolved_intent` to the backend.
|
| 221 |
+
- [ ] **[Polish]** Tighten the **thumbs-up boost** β today it only annotates the
|
| 222 |
+
prompt. The retriever should also bias affirmative-leaning candidates when
|
| 223 |
+
a thumbs-up is present.
|
| 224 |
+
|
| 225 |
+
### Agentic Intent Decomposition
|
| 226 |
+
|
| 227 |
+
> **Current state:** intent routing is **keyword-based**, not LLM-based.
|
| 228 |
+
> The original LLM-driven router (Pydantic-validated JSON output) was
|
| 229 |
+
> dropped because `gemma4:31b-cloud` consistently emitted the wrong JSON
|
| 230 |
+
> shape and got truncated by `max_tokens`, triggering 3 retries + a
|
| 231 |
+
> hard-fallback on every turn β adding ~30s of dead latency before the
|
| 232 |
+
> generation call. The keyword router (~5 buckets matched against
|
| 233 |
+
> hardcoded word lists in `intent.py`) handles the demo personas
|
| 234 |
+
> reliably and adds ~0ms per turn.
|
| 235 |
+
>
|
| 236 |
+
> **Trade-off:** the router is limited to the 5 hardcoded buckets
|
| 237 |
+
> (`family`, `medical`, `hobbies`, `daily_routine`, `social`) and can't
|
| 238 |
+
> distinguish `OPEN_DOMAIN` from `PERSONAL` queries. Acceptable today
|
| 239 |
+
> because all current personas only have personal memories.
|
| 240 |
+
|
| 241 |
+
- [ ] **[Core]** Make Personal / Contextual / Open-domain routing actually hit
|
| 242 |
+
**different retrieval pools** β today all sub-queries fall back to the same
|
| 243 |
+
vector index. Requires re-introducing some form of intent classification
|
| 244 |
+
(likely a constrained-output LLM call once `response_format=json_schema`
|
| 245 |
+
is supported on Ollama Cloud, or a tiny local classifier).
|
| 246 |
+
- [ ] **[Perf]** When/if we re-add LLM intent: cache the schema prompt,
|
| 247 |
+
use a smaller routing model, and parallelise sub-query retrieval.
|
| 248 |
+
|
| 249 |
+
### Retrieval
|
| 250 |
+
|
| 251 |
+
- [ ] **[Bonus]** Persist **bucket priors** per user across conversations
|
| 252 |
+
(currently per-session only).
|
| 253 |
+
- [ ] **[Bonus]** Extend the **latency-optimised fallback** beyond a single
|
| 254 |
+
LLM-tier switch:
|
| 255 |
+
- [ ] Return a cached canned response when end-to-end latency blows the budget
|
| 256 |
+
- [ ] Use the spec's **< 6s end-to-end** target instead of the current 3.5s threshold
|
| 257 |
+
- [ ] **[Scale]** When per-user memory grows past ~100k chunks, swap the
|
| 258 |
+
torch-tensor matmul search for `hnswlib` (a ~2 MB approximate-NN library);
|
| 259 |
+
reintroduce a cross-encoder reranker once `top_k > ~30`.
|
| 260 |
+
|
| 261 |
+
### Training-Free Response Generation
|
| 262 |
+
|
| 263 |
+
- [ ] **[Core]** Return **multiple candidate responses** from the API so the
|
| 264 |
+
user can pick one (today the endpoint returns a single string).
|
| 265 |
+
- [ ] **[Bonus]** On user selection, upsert the `(query, selected_response)` pair
|
| 266 |
+
into a small "accepted-pairs" index and consult it as a high-prior shortcut
|
| 267 |
+
on the next turn β the spec's lightweight retrieval-index update.
|
| 268 |
+
|
| 269 |
+
### Evaluation & Validation
|
| 270 |
+
|
| 271 |
+
- [ ] **[Eval]** **Factual Faithfulness** β NLI-based groundedness metric over
|
| 272 |
+
(retrieved evidence, generated response) pairs, reported as a hallucination
|
| 273 |
+
rate on a held-out set of partner-style queries per persona.
|
| 274 |
+
- [ ] **[Eval]** **Communication Efficiency** β p50 / p95 end-to-end latency
|
| 275 |
+
across all three LLM tiers, with a pass/fail gate at the spec target of
|
| 276 |
+
**< 6s p95**.
|
| 277 |
+
- [ ] **[Eval]** **Perceived Authenticity** β generate paired (persona, query,
|
| 278 |
+
response) samples and a 5-point Likert rating sheet for the live in-class eval.
|
| 279 |
+
- [ ] **[Eval]** **Multimodal Alignment** β synthetic (gesture, query) scenarios
|
| 280 |
+
checked against expected response traits (e.g. thumbs-up β affirmative
|
| 281 |
+
lexicon present), reported as alignment accuracy.
|
| 282 |
+
|
| 283 |
+
### Polish
|
| 284 |
+
|
| 285 |
+
- [ ] **[Polish]** Move the hard-coded affectβtone and persona-override dicts
|
| 286 |
+
into a single YAML so tone-shaping can be tuned without touching code.
|
| 287 |
+
- [x] **[Polish]** Delete the unused `backend/sensing/` Python modules now that
|
| 288 |
+
sensing lives entirely in the frontend. *(Done β only `labels.py` remains.)*
|
| 289 |
+
>>>>>>> Stashed changes
|
| 290 |
|
| 291 |
Heads up: all camera/sensing stuff is in the frontend (MediaPipe JS). Backend just gets the labels (`affect`, `gesture_tag`, `gaze_bucket`). The `backend/sensing/` python modules are dead code.
|
| 292 |
|
backend/api/main.py
CHANGED
|
@@ -14,10 +14,10 @@ from backend.generation.llm_client import ( # active_model used by /debug/confi
|
|
| 14 |
get_client,
|
| 15 |
)
|
| 16 |
from backend.guardrails.checks import check_input
|
| 17 |
-
from backend.pipeline.graph import
|
| 18 |
from backend.pipeline.state import PipelineState
|
| 19 |
from backend.retrieval.bucket_priors import uniform_priors
|
| 20 |
-
from backend.retrieval.vector_store import _get_embedder
|
| 21 |
|
| 22 |
app = FastAPI(
|
| 23 |
title="Multimodal AAC Chatbot API",
|
|
@@ -45,7 +45,6 @@ def _warmup():
|
|
| 45 |
logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
|
| 46 |
print("Loading models...", end=" ", flush=True)
|
| 47 |
_get_embedder()
|
| 48 |
-
_get_reranker()
|
| 49 |
get_client()
|
| 50 |
_models_ready = True
|
| 51 |
print("ready.")
|
|
@@ -151,7 +150,7 @@ def _build_initial_state(req: ChatRequest, session: dict) -> PipelineState:
|
|
| 151 |
"t_generation": 0.0,
|
| 152 |
"t_total": 0.0,
|
| 153 |
},
|
| 154 |
-
|
| 155 |
guardrail_passed=True,
|
| 156 |
)
|
| 157 |
|
|
@@ -172,7 +171,6 @@ def debug_config():
|
|
| 172 |
"active_model": active_model(),
|
| 173 |
"thinking_mode": settings.thinking_mode,
|
| 174 |
"embed_model": settings.embed_model,
|
| 175 |
-
"rerank_model": settings.rerank_model,
|
| 176 |
"retrieval_top_k": settings.retrieval_top_k,
|
| 177 |
"retrieval_rerank_k": settings.retrieval_rerank_k,
|
| 178 |
"fallback_latency_threshold": settings.fallback_latency_threshold,
|
|
@@ -217,7 +215,7 @@ def chat(req: ChatRequest):
|
|
| 217 |
session = _get_or_init_session(req.user_id)
|
| 218 |
initial_state = _build_initial_state(req, session)
|
| 219 |
|
| 220 |
-
result: PipelineState =
|
| 221 |
|
| 222 |
# Persist updated session state
|
| 223 |
session["session_history"] = result["session_history"]
|
|
|
|
| 14 |
get_client,
|
| 15 |
)
|
| 16 |
from backend.guardrails.checks import check_input
|
| 17 |
+
from backend.pipeline.graph import run_pipeline
|
| 18 |
from backend.pipeline.state import PipelineState
|
| 19 |
from backend.retrieval.bucket_priors import uniform_priors
|
| 20 |
+
from backend.retrieval.vector_store import _get_embedder
|
| 21 |
|
| 22 |
app = FastAPI(
|
| 23 |
title="Multimodal AAC Chatbot API",
|
|
|
|
| 45 |
logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
|
| 46 |
print("Loading models...", end=" ", flush=True)
|
| 47 |
_get_embedder()
|
|
|
|
| 48 |
get_client()
|
| 49 |
_models_ready = True
|
| 50 |
print("ready.")
|
|
|
|
| 150 |
"t_generation": 0.0,
|
| 151 |
"t_total": 0.0,
|
| 152 |
},
|
| 153 |
+
run_id=None,
|
| 154 |
guardrail_passed=True,
|
| 155 |
)
|
| 156 |
|
|
|
|
| 171 |
"active_model": active_model(),
|
| 172 |
"thinking_mode": settings.thinking_mode,
|
| 173 |
"embed_model": settings.embed_model,
|
|
|
|
| 174 |
"retrieval_top_k": settings.retrieval_top_k,
|
| 175 |
"retrieval_rerank_k": settings.retrieval_rerank_k,
|
| 176 |
"fallback_latency_threshold": settings.fallback_latency_threshold,
|
|
|
|
| 215 |
session = _get_or_init_session(req.user_id)
|
| 216 |
initial_state = _build_initial_state(req, session)
|
| 217 |
|
| 218 |
+
result: PipelineState = run_pipeline(initial_state)
|
| 219 |
|
| 220 |
# Persist updated session state
|
| 221 |
session["session_history"] = result["session_history"]
|
backend/config/settings.py
CHANGED
|
@@ -10,35 +10,30 @@ class Settings(BaseSettings):
|
|
| 10 |
|
| 11 |
# ββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
data_dir: Path = Path("data")
|
| 13 |
-
faiss_store_dir: Path = Path("data/faiss_store")
|
| 14 |
memories_dir: Path = Path("data/memories")
|
| 15 |
users_json: Path = Path("data/users.json")
|
|
|
|
| 16 |
|
| 17 |
-
# ββ Retrieval
|
| 18 |
embed_model: str = "BAAI/bge-small-en-v1.5"
|
| 19 |
-
rerank_model: str = "BAAI/bge-reranker-v2-m3"
|
| 20 |
retrieval_top_k: int = 5
|
| 21 |
retrieval_rerank_k: int = 3
|
| 22 |
retrieval_fast_k: int = 2 # used when affect == FRUSTRATED
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
fallback_api_key: str = "token-abc"
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
|
| 37 |
-
local_base_url: str = "http://localhost:11434/v1"
|
| 38 |
-
local_api_key: str = "ollama"
|
| 39 |
-
|
| 40 |
-
# Active tier: "primary" | "fallback" | "local"
|
| 41 |
-
active_llm_tier: str = "local"
|
| 42 |
|
| 43 |
# off | strip | full | suppress
|
| 44 |
thinking_mode: str = "off"
|
|
@@ -60,10 +55,6 @@ class Settings(BaseSettings):
|
|
| 60 |
air_write_end_gap_ms: int = 200 # ms of stillness to end a stroke
|
| 61 |
conflict_overlap_ms: int = 500 # audio + gesture co-occurrence window
|
| 62 |
|
| 63 |
-
# ββ MLflow ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
-
mlflow_tracking_uri: str = "sqlite:///mlflow.db"
|
| 65 |
-
mlflow_experiment: str = "aac-chatbot"
|
| 66 |
-
|
| 67 |
# ββ Candidate ranking weights βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
rank_alpha: float = 0.4 # faithfulness weight
|
| 69 |
rank_beta: float = 0.3 # style similarity weight
|
|
|
|
| 10 |
|
| 11 |
# ββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
data_dir: Path = Path("data")
|
| 13 |
+
faiss_store_dir: Path = Path("data/faiss_store") # name kept for back-compat
|
| 14 |
memories_dir: Path = Path("data/memories")
|
| 15 |
users_json: Path = Path("data/users.json")
|
| 16 |
+
logs_dir: Path = Path("logs")
|
| 17 |
|
| 18 |
+
# ββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
embed_model: str = "BAAI/bge-small-en-v1.5"
|
|
|
|
| 20 |
retrieval_top_k: int = 5
|
| 21 |
retrieval_rerank_k: int = 3
|
| 22 |
retrieval_fast_k: int = 2 # used when affect == FRUSTRATED
|
| 23 |
|
| 24 |
+
# LLM tiers β both hit Ollama Cloud via OpenAI-compatible endpoint.
|
| 25 |
+
# Same model on both tiers for now; swap one when a larger cloud model
|
| 26 |
+
# is provisioned and the latency-fallback should branch.
|
| 27 |
+
primary_model: str = "gemma4:31b-cloud"
|
| 28 |
+
primary_base_url: str = "http://localhost:11434/v1"
|
| 29 |
+
primary_api_key: str = "ollama"
|
| 30 |
|
| 31 |
+
fallback_model: str = "gemma4:31b-cloud"
|
| 32 |
+
fallback_base_url: str = "http://localhost:11434/v1"
|
| 33 |
+
fallback_api_key: str = "ollama"
|
|
|
|
| 34 |
|
| 35 |
+
# Active tier: "primary" | "fallback"
|
| 36 |
+
active_llm_tier: str = "primary"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# off | strip | full | suppress
|
| 39 |
thinking_mode: str = "off"
|
|
|
|
| 55 |
air_write_end_gap_ms: int = 200 # ms of stillness to end a stroke
|
| 56 |
conflict_overlap_ms: int = 500 # audio + gesture co-occurrence window
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# ββ Candidate ranking weights βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
rank_alpha: float = 0.4 # faithfulness weight
|
| 60 |
rank_beta: float = 0.3 # style similarity weight
|
backend/generation/llm_client.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
-
#
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
import re
|
| 5 |
from functools import lru_cache
|
| 6 |
from typing import Any
|
|
@@ -10,33 +8,23 @@ from openai import OpenAI
|
|
| 10 |
from backend.config.settings import settings
|
| 11 |
|
| 12 |
|
| 13 |
-
@lru_cache(maxsize=
|
| 14 |
def _build_client(base_url: str, api_key: str) -> OpenAI:
|
| 15 |
return OpenAI(base_url=base_url, api_key=api_key)
|
| 16 |
|
| 17 |
|
| 18 |
def get_client(tier: str | None = None) -> OpenAI:
|
| 19 |
resolved = tier or settings.active_llm_tier
|
| 20 |
-
|
| 21 |
-
if resolved == "primary":
|
| 22 |
-
return _build_client(settings.primary_base_url, settings.primary_api_key)
|
| 23 |
if resolved == "fallback":
|
| 24 |
return _build_client(settings.fallback_base_url, settings.fallback_api_key)
|
| 25 |
-
|
| 26 |
-
return _build_client(settings.local_base_url, settings.local_api_key)
|
| 27 |
|
| 28 |
|
| 29 |
def active_model(tier: str | None = None) -> str:
|
| 30 |
resolved = tier or settings.active_llm_tier
|
| 31 |
-
models = {
|
| 32 |
-
"primary": settings.primary_model,
|
| 33 |
-
"fallback": settings.fallback_model,
|
| 34 |
-
"local": settings.local_model,
|
| 35 |
-
}
|
| 36 |
if resolved not in models:
|
| 37 |
-
raise ValueError(
|
| 38 |
-
f"Unknown LLM tier: '{resolved}'. Must be primary/fallback/local."
|
| 39 |
-
)
|
| 40 |
return models[resolved]
|
| 41 |
|
| 42 |
|
|
@@ -61,29 +49,16 @@ def chat_complete(
|
|
| 61 |
temperature: float = 0.7,
|
| 62 |
**kwargs: Any,
|
| 63 |
) -> str:
|
| 64 |
-
# Returns response text. Handles thinking mode and local-tier collapsing.
|
| 65 |
resolved_tier = tier or settings.active_llm_tier
|
| 66 |
-
|
| 67 |
-
# Local dev: no GCP server available β collapse all tiers to Ollama
|
| 68 |
-
if settings.active_llm_tier == "local":
|
| 69 |
-
resolved_tier = "local"
|
| 70 |
model = active_model(resolved_tier)
|
| 71 |
client = get_client(resolved_tier)
|
| 72 |
|
| 73 |
patched_messages = messages
|
| 74 |
extra_body: dict[str, Any] = kwargs.pop("extra_body", {})
|
| 75 |
|
| 76 |
-
# Suppress thinking for models that think by default.
|
| 77 |
if settings.thinking_mode == "suppress":
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
else:
|
| 81 |
-
extra_body = {
|
| 82 |
-
**extra_body,
|
| 83 |
-
"chat_template_kwargs": {"enable_thinking": False},
|
| 84 |
-
}
|
| 85 |
-
|
| 86 |
-
# Add thinking budget when enabled.
|
| 87 |
effective_max_tokens = max_tokens
|
| 88 |
if settings.thinking_mode in ("strip", "full"):
|
| 89 |
effective_max_tokens = max_tokens + settings.thinking_token_budget
|
|
|
|
| 1 |
+
# Two-tier LLM client β primary / fallback, both Ollama Cloud over OpenAI-compatible HTTP.
|
|
|
|
|
|
|
| 2 |
import re
|
| 3 |
from functools import lru_cache
|
| 4 |
from typing import Any
|
|
|
|
| 8 |
from backend.config.settings import settings
|
| 9 |
|
| 10 |
|
| 11 |
+
@lru_cache(maxsize=2)
|
| 12 |
def _build_client(base_url: str, api_key: str) -> OpenAI:
|
| 13 |
return OpenAI(base_url=base_url, api_key=api_key)
|
| 14 |
|
| 15 |
|
| 16 |
def get_client(tier: str | None = None) -> OpenAI:
|
| 17 |
resolved = tier or settings.active_llm_tier
|
|
|
|
|
|
|
|
|
|
| 18 |
if resolved == "fallback":
|
| 19 |
return _build_client(settings.fallback_base_url, settings.fallback_api_key)
|
| 20 |
+
return _build_client(settings.primary_base_url, settings.primary_api_key)
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def active_model(tier: str | None = None) -> str:
|
| 24 |
resolved = tier or settings.active_llm_tier
|
| 25 |
+
models = {"primary": settings.primary_model, "fallback": settings.fallback_model}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
if resolved not in models:
|
| 27 |
+
raise ValueError(f"Unknown LLM tier: '{resolved}'. Must be primary/fallback.")
|
|
|
|
|
|
|
| 28 |
return models[resolved]
|
| 29 |
|
| 30 |
|
|
|
|
| 49 |
temperature: float = 0.7,
|
| 50 |
**kwargs: Any,
|
| 51 |
) -> str:
|
|
|
|
| 52 |
resolved_tier = tier or settings.active_llm_tier
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
model = active_model(resolved_tier)
|
| 54 |
client = get_client(resolved_tier)
|
| 55 |
|
| 56 |
patched_messages = messages
|
| 57 |
extra_body: dict[str, Any] = kwargs.pop("extra_body", {})
|
| 58 |
|
|
|
|
| 59 |
if settings.thinking_mode == "suppress":
|
| 60 |
+
patched_messages = _apply_no_think(messages)
|
| 61 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
effective_max_tokens = max_tokens
|
| 63 |
if settings.thinking_mode in ("strip", "full"):
|
| 64 |
effective_max_tokens = max_tokens + settings.thinking_token_budget
|
backend/main.py
CHANGED
|
@@ -9,10 +9,10 @@ import time
|
|
| 9 |
|
| 10 |
from backend.config.settings import settings
|
| 11 |
from backend.guardrails.checks import check_input
|
| 12 |
-
from backend.pipeline.graph import
|
| 13 |
from backend.pipeline.state import GenerationConfig, PipelineState
|
| 14 |
from backend.retrieval.bucket_priors import uniform_priors
|
| 15 |
-
from backend.retrieval.vector_store import _get_embedder
|
| 16 |
|
| 17 |
|
| 18 |
def parse_args() -> argparse.Namespace:
|
|
@@ -28,7 +28,7 @@ def parse_args() -> argparse.Namespace:
|
|
| 28 |
"--tier",
|
| 29 |
type=str,
|
| 30 |
default=None,
|
| 31 |
-
choices=["primary", "fallback"
|
| 32 |
help="Override LLM tier (default: settings.active_llm_tier)",
|
| 33 |
)
|
| 34 |
return p.parse_args()
|
|
@@ -134,7 +134,6 @@ def main() -> None:
|
|
| 134 |
# Warm up models
|
| 135 |
print(f"\nLoading models for {profile['name']} β¦", end=" ", flush=True)
|
| 136 |
_get_embedder()
|
| 137 |
-
_get_reranker()
|
| 138 |
print("ready.\n")
|
| 139 |
|
| 140 |
session_history: list[dict] = []
|
|
@@ -197,11 +196,11 @@ def main() -> None:
|
|
| 197 |
"t_generation": 0.0,
|
| 198 |
"t_total": 0.0,
|
| 199 |
},
|
| 200 |
-
|
| 201 |
guardrail_passed=True,
|
| 202 |
)
|
| 203 |
|
| 204 |
-
result: PipelineState =
|
| 205 |
|
| 206 |
print(f"AAC Bot: {result['selected_response']}\n")
|
| 207 |
|
|
|
|
| 9 |
|
| 10 |
from backend.config.settings import settings
|
| 11 |
from backend.guardrails.checks import check_input
|
| 12 |
+
from backend.pipeline.graph import run_pipeline
|
| 13 |
from backend.pipeline.state import GenerationConfig, PipelineState
|
| 14 |
from backend.retrieval.bucket_priors import uniform_priors
|
| 15 |
+
from backend.retrieval.vector_store import _get_embedder
|
| 16 |
|
| 17 |
|
| 18 |
def parse_args() -> argparse.Namespace:
|
|
|
|
| 28 |
"--tier",
|
| 29 |
type=str,
|
| 30 |
default=None,
|
| 31 |
+
choices=["primary", "fallback"],
|
| 32 |
help="Override LLM tier (default: settings.active_llm_tier)",
|
| 33 |
)
|
| 34 |
return p.parse_args()
|
|
|
|
| 134 |
# Warm up models
|
| 135 |
print(f"\nLoading models for {profile['name']} β¦", end=" ", flush=True)
|
| 136 |
_get_embedder()
|
|
|
|
| 137 |
print("ready.\n")
|
| 138 |
|
| 139 |
session_history: list[dict] = []
|
|
|
|
| 196 |
"t_generation": 0.0,
|
| 197 |
"t_total": 0.0,
|
| 198 |
},
|
| 199 |
+
run_id=None,
|
| 200 |
guardrail_passed=True,
|
| 201 |
)
|
| 202 |
|
| 203 |
+
result: PipelineState = run_pipeline(state)
|
| 204 |
|
| 205 |
print(f"AAC Bot: {result['selected_response']}\n")
|
| 206 |
|
backend/pipeline/graph.py
CHANGED
|
@@ -1,65 +1,36 @@
|
|
| 1 |
-
#
|
| 2 |
-
from
|
| 3 |
-
|
| 4 |
from backend.pipeline.nodes import feedback, intent, planner, retrieval
|
| 5 |
from backend.pipeline.state import PipelineState
|
| 6 |
|
| 7 |
|
| 8 |
def _route_by_affect(state: PipelineState) -> str:
|
| 9 |
-
"""Conditional edge: FRUSTRATED β fast path, otherwise full retrieval."""
|
| 10 |
emotion = (state.get("affect") or {}).get("emotion", "NEUTRAL")
|
| 11 |
return "fast" if emotion == "FRUSTRATED" else "full"
|
| 12 |
|
| 13 |
|
| 14 |
def _route_by_latency(state: PipelineState) -> str:
|
| 15 |
-
"""Conditional edge: if cumulative latency > threshold, use fallback LLM."""
|
| 16 |
-
from backend.config.settings import settings
|
| 17 |
-
|
| 18 |
log = state.get("latency_log") or {}
|
| 19 |
elapsed = log.get("t_intent", 0.0) + log.get("t_retrieval", 0.0)
|
| 20 |
return "fallback" if elapsed > settings.fallback_latency_threshold else "primary"
|
| 21 |
|
| 22 |
|
| 23 |
-
def
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
# ββ Nodes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
-
graph.add_node("intent", intent.run)
|
| 28 |
-
graph.add_node("fast_retrieval", retrieval.run_fast)
|
| 29 |
-
graph.add_node("full_retrieval", retrieval.run_full)
|
| 30 |
-
graph.add_node("primary_gen", planner.run_primary)
|
| 31 |
-
graph.add_node("fallback_gen", planner.run_fallback)
|
| 32 |
-
graph.add_node("feedback", feedback.run)
|
| 33 |
-
|
| 34 |
-
# ββ Entry ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
-
graph.set_entry_point("intent")
|
| 36 |
-
|
| 37 |
-
# ββ Affect-aware routing after intent βββββββββββββββββββββββββββββββββββββ
|
| 38 |
-
graph.add_conditional_edges(
|
| 39 |
-
"intent",
|
| 40 |
-
_route_by_affect,
|
| 41 |
-
{"fast": "fast_retrieval", "full": "full_retrieval"},
|
| 42 |
-
)
|
| 43 |
|
| 44 |
-
# ββ Latency-aware routing after retrieval βββββββββββββββββββββββββββββββββ
|
| 45 |
-
graph.add_conditional_edges(
|
| 46 |
-
"fast_retrieval",
|
| 47 |
-
_route_by_latency,
|
| 48 |
-
{"primary": "primary_gen", "fallback": "fallback_gen"},
|
| 49 |
-
)
|
| 50 |
-
graph.add_conditional_edges(
|
| 51 |
-
"full_retrieval",
|
| 52 |
-
_route_by_latency,
|
| 53 |
-
{"primary": "primary_gen", "fallback": "fallback_gen"},
|
| 54 |
-
)
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
graph.add_edge("fallback_gen", "feedback")
|
| 59 |
-
graph.add_edge("feedback", END)
|
| 60 |
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
|
|
|
| 1 |
+
# Pipeline orchestrator: intent β retrieval β generation β feedback.
|
| 2 |
+
from backend.config.settings import settings
|
|
|
|
| 3 |
from backend.pipeline.nodes import feedback, intent, planner, retrieval
|
| 4 |
from backend.pipeline.state import PipelineState
|
| 5 |
|
| 6 |
|
| 7 |
def _route_by_affect(state: PipelineState) -> str:
|
|
|
|
| 8 |
emotion = (state.get("affect") or {}).get("emotion", "NEUTRAL")
|
| 9 |
return "fast" if emotion == "FRUSTRATED" else "full"
|
| 10 |
|
| 11 |
|
| 12 |
def _route_by_latency(state: PipelineState) -> str:
|
|
|
|
|
|
|
|
|
|
| 13 |
log = state.get("latency_log") or {}
|
| 14 |
elapsed = log.get("t_intent", 0.0) + log.get("t_retrieval", 0.0)
|
| 15 |
return "fallback" if elapsed > settings.fallback_latency_threshold else "primary"
|
| 16 |
|
| 17 |
|
| 18 |
+
def _merge(state: PipelineState, update: dict) -> None:
|
| 19 |
+
state.update(update) # type: ignore[typeddict-item]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
def run_pipeline(state: PipelineState) -> PipelineState:
|
| 23 |
+
_merge(state, intent.run(state))
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
if _route_by_affect(state) == "fast":
|
| 26 |
+
_merge(state, retrieval.run_fast(state))
|
| 27 |
+
else:
|
| 28 |
+
_merge(state, retrieval.run_full(state))
|
| 29 |
|
| 30 |
+
if _route_by_latency(state) == "fallback":
|
| 31 |
+
_merge(state, planner.run_fallback(state))
|
| 32 |
+
else:
|
| 33 |
+
_merge(state, planner.run_primary(state))
|
| 34 |
|
| 35 |
+
_merge(state, feedback.run(state))
|
| 36 |
+
return state
|
backend/pipeline/nodes/feedback.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
-
# Feedback node β
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
from backend.config.settings import settings
|
| 5 |
from backend.pipeline.state import PipelineState
|
|
@@ -7,64 +10,52 @@ from backend.retrieval.bucket_priors import update_priors
|
|
| 7 |
|
| 8 |
|
| 9 |
def run(state: PipelineState) -> dict:
|
|
|
|
| 10 |
try:
|
| 11 |
-
|
| 12 |
-
except Exception:
|
| 13 |
-
|
|
|
|
| 14 |
updated_priors = _update_bucket_priors(state)
|
| 15 |
updated_history = _append_turn_to_history(state)
|
| 16 |
|
| 17 |
return {
|
| 18 |
"bucket_priors": updated_priors,
|
| 19 |
"session_history": updated_history,
|
| 20 |
-
"
|
| 21 |
}
|
| 22 |
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
import mlflow
|
| 29 |
-
|
| 30 |
-
mlflow.set_tracking_uri(settings.mlflow_tracking_uri)
|
| 31 |
-
mlflow.set_experiment(settings.mlflow_experiment)
|
| 32 |
|
| 33 |
latency = state.get("latency_log") or {}
|
| 34 |
affect = (state.get("affect") or {}).get("emotion", "UNKNOWN")
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
}
|
| 56 |
-
)
|
| 57 |
-
|
| 58 |
-
# Log the selected response as artifact text for qualitative review
|
| 59 |
-
mlflow.log_text(
|
| 60 |
-
state.get("selected_response") or "",
|
| 61 |
-
f"responses/turn_{state['turn_id']}.txt",
|
| 62 |
-
)
|
| 63 |
-
|
| 64 |
-
return run.info.run_id
|
| 65 |
-
|
| 66 |
|
| 67 |
-
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
def _update_bucket_priors(state: PipelineState) -> dict[str, float]:
|
|
@@ -83,11 +74,10 @@ def _update_bucket_priors(state: PipelineState) -> dict[str, float]:
|
|
| 83 |
)
|
| 84 |
|
| 85 |
|
| 86 |
-
# ββ Session history append βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 87 |
-
|
| 88 |
-
|
| 89 |
def _append_turn_to_history(state: PipelineState) -> list[dict]:
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# Feedback node β JSONL turn logging, bucket prior update, history append.
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
import uuid
|
| 5 |
+
from pathlib import Path
|
| 6 |
|
| 7 |
from backend.config.settings import settings
|
| 8 |
from backend.pipeline.state import PipelineState
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def run(state: PipelineState) -> dict:
|
| 13 |
+
run_id = uuid.uuid4().hex
|
| 14 |
try:
|
| 15 |
+
_log_to_jsonl(state, run_id)
|
| 16 |
+
except Exception as exc:
|
| 17 |
+
# logging never blocks the response path, but make the failure visible
|
| 18 |
+
print(f"[feedback] JSONL log failed: {exc!r}")
|
| 19 |
updated_priors = _update_bucket_priors(state)
|
| 20 |
updated_history = _append_turn_to_history(state)
|
| 21 |
|
| 22 |
return {
|
| 23 |
"bucket_priors": updated_priors,
|
| 24 |
"session_history": updated_history,
|
| 25 |
+
"run_id": run_id,
|
| 26 |
}
|
| 27 |
|
| 28 |
|
| 29 |
+
def _log_to_jsonl(state: PipelineState, run_id: str) -> None:
|
| 30 |
+
logs_dir = Path(settings.logs_dir)
|
| 31 |
+
logs_dir.mkdir(parents=True, exist_ok=True)
|
| 32 |
+
log_path = logs_dir / "turns.jsonl"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
latency = state.get("latency_log") or {}
|
| 35 |
affect = (state.get("affect") or {}).get("emotion", "UNKNOWN")
|
| 36 |
|
| 37 |
+
entry = {
|
| 38 |
+
"run_id": run_id,
|
| 39 |
+
"ts": time.time(),
|
| 40 |
+
"user_id": state["user_id"],
|
| 41 |
+
"turn_id": state["turn_id"],
|
| 42 |
+
"llm_tier": state.get("llm_tier_used", "unknown"),
|
| 43 |
+
"retrieval_mode": state.get("retrieval_mode_used", "unknown"),
|
| 44 |
+
"affect": affect,
|
| 45 |
+
"guardrail_passed": state.get("guardrail_passed", True),
|
| 46 |
+
"num_chunks": len(state.get("retrieved_chunks") or []),
|
| 47 |
+
"latency": {
|
| 48 |
+
"t_sensing": latency.get("t_sensing", 0.0),
|
| 49 |
+
"t_intent": latency.get("t_intent", 0.0),
|
| 50 |
+
"t_retrieval": latency.get("t_retrieval", 0.0),
|
| 51 |
+
"t_generation": latency.get("t_generation", 0.0),
|
| 52 |
+
"t_total": latency.get("t_total", 0.0),
|
| 53 |
+
},
|
| 54 |
+
"response": state.get("selected_response") or "",
|
| 55 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
+
with open(log_path, "a", encoding="utf-8") as f:
|
| 58 |
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
| 59 |
|
| 60 |
|
| 61 |
def _update_bucket_priors(state: PipelineState) -> dict[str, float]:
|
|
|
|
| 74 |
)
|
| 75 |
|
| 76 |
|
|
|
|
|
|
|
|
|
|
| 77 |
def _append_turn_to_history(state: PipelineState) -> list[dict]:
|
| 78 |
+
history = list(state.get("session_history") or [])
|
| 79 |
+
history.append({"role": "partner", "content": state["raw_query"]})
|
| 80 |
+
history.append(
|
| 81 |
+
{"role": "aac_user", "content": state.get("selected_response") or ""}
|
| 82 |
+
)
|
| 83 |
+
return history
|
backend/pipeline/nodes/intent.py
CHANGED
|
@@ -106,7 +106,6 @@ def _build_user_prompt(
|
|
| 106 |
|
| 107 |
|
| 108 |
def run(state: PipelineState) -> dict:
|
| 109 |
-
"""LangGraph node: intent decomposition."""
|
| 110 |
t0 = time.perf_counter()
|
| 111 |
|
| 112 |
# --fast mode: intent_route already resolved by keyword routing in main.py
|
|
@@ -123,7 +122,7 @@ def run(state: PipelineState) -> dict:
|
|
| 123 |
route: IntentRoute | None = None
|
| 124 |
last_error: str = ""
|
| 125 |
|
| 126 |
-
for attempt in range(3): #
|
| 127 |
messages = [
|
| 128 |
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 129 |
{
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
def run(state: PipelineState) -> dict:
|
|
|
|
| 109 |
t0 = time.perf_counter()
|
| 110 |
|
| 111 |
# --fast mode: intent_route already resolved by keyword routing in main.py
|
|
|
|
| 122 |
route: IntentRoute | None = None
|
| 123 |
last_error: str = ""
|
| 124 |
|
| 125 |
+
for attempt in range(3): # up to 2 retries on schema validation failure
|
| 126 |
messages = [
|
| 127 |
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 128 |
{
|
backend/pipeline/nodes/planner.py
CHANGED
|
@@ -7,7 +7,7 @@ from backend.config.settings import settings
|
|
| 7 |
from backend.generation.llm_client import active_model, chat_complete
|
| 8 |
from backend.guardrails.checks import check_output
|
| 9 |
from backend.pipeline.state import PipelineState
|
| 10 |
-
from backend.sensing.
|
| 11 |
|
| 12 |
# ββ Persona-specific tone tags (applied on top of affect base tag) βββββββββββββ
|
| 13 |
|
|
@@ -94,16 +94,12 @@ def _run(state: PipelineState, tier: str) -> dict:
|
|
| 94 |
4,
|
| 95 |
)
|
| 96 |
|
| 97 |
-
# Mirror chat_complete's tier collapsing so the reported model matches what ran.
|
| 98 |
-
actual_tier = "local" if settings.active_llm_tier == "local" else tier
|
| 99 |
-
actual_model = active_model(actual_tier)
|
| 100 |
-
|
| 101 |
return {
|
| 102 |
"augmented_prompt": prompt,
|
| 103 |
"candidates": candidates,
|
| 104 |
"selected_response": selected,
|
| 105 |
-
"llm_tier_used":
|
| 106 |
-
"llm_model_used":
|
| 107 |
"latency_log": latency_log,
|
| 108 |
"guardrail_passed": guard["passed"],
|
| 109 |
}
|
|
|
|
| 7 |
from backend.generation.llm_client import active_model, chat_complete
|
| 8 |
from backend.guardrails.checks import check_output
|
| 9 |
from backend.pipeline.state import PipelineState
|
| 10 |
+
from backend.sensing.labels import GESTURE_TO_TAG
|
| 11 |
|
| 12 |
# ββ Persona-specific tone tags (applied on top of affect base tag) βββββββββββββ
|
| 13 |
|
|
|
|
| 94 |
4,
|
| 95 |
)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
return {
|
| 98 |
"augmented_prompt": prompt,
|
| 99 |
"candidates": candidates,
|
| 100 |
"selected_response": selected,
|
| 101 |
+
"llm_tier_used": tier,
|
| 102 |
+
"llm_model_used": active_model(tier),
|
| 103 |
"latency_log": latency_log,
|
| 104 |
"guardrail_passed": guard["passed"],
|
| 105 |
}
|
backend/pipeline/nodes/retrieval.py
CHANGED
|
@@ -26,14 +26,13 @@ def run_fast(state: PipelineState) -> dict:
|
|
| 26 |
top_k=settings.retrieval_fast_k,
|
| 27 |
rerank_k=settings.retrieval_fast_k,
|
| 28 |
bucket_filter=bucket_hint,
|
| 29 |
-
use_reranker=False,
|
| 30 |
)
|
| 31 |
|
| 32 |
return _build_return(state, chunks, "fast", t0)
|
| 33 |
|
| 34 |
|
| 35 |
def run_full(state: PipelineState) -> dict:
|
| 36 |
-
"""Full retrieval path
|
| 37 |
t0 = time.perf_counter()
|
| 38 |
|
| 39 |
# Prefer gaze hint > intent bucket hint > None
|
|
@@ -49,7 +48,6 @@ def run_full(state: PipelineState) -> dict:
|
|
| 49 |
top_k=settings.retrieval_top_k,
|
| 50 |
rerank_k=settings.retrieval_rerank_k,
|
| 51 |
bucket_filter=bucket_hint,
|
| 52 |
-
use_reranker=True,
|
| 53 |
)
|
| 54 |
|
| 55 |
return _build_return(state, chunks, "full", t0)
|
|
|
|
| 26 |
top_k=settings.retrieval_fast_k,
|
| 27 |
rerank_k=settings.retrieval_fast_k,
|
| 28 |
bucket_filter=bucket_hint,
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
return _build_return(state, chunks, "fast", t0)
|
| 32 |
|
| 33 |
|
| 34 |
def run_full(state: PipelineState) -> dict:
|
| 35 |
+
"""Full retrieval path: top_k cosine matches narrowed to rerank_k."""
|
| 36 |
t0 = time.perf_counter()
|
| 37 |
|
| 38 |
# Prefer gaze hint > intent bucket hint > None
|
|
|
|
| 48 |
top_k=settings.retrieval_top_k,
|
| 49 |
rerank_k=settings.retrieval_rerank_k,
|
| 50 |
bucket_filter=bucket_hint,
|
|
|
|
| 51 |
)
|
| 52 |
|
| 53 |
return _build_return(state, chunks, "full", t0)
|
backend/pipeline/state.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
-
# Typed state flowing through every
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
-
import
|
| 5 |
-
from typing import Annotated, Any
|
| 6 |
|
| 7 |
from typing_extensions import TypedDict
|
| 8 |
|
|
@@ -26,7 +25,7 @@ class RetrievedChunk(TypedDict):
|
|
| 26 |
text: str
|
| 27 |
bucket: str # family | medical | hobbies | daily_routine | social
|
| 28 |
user: str
|
| 29 |
-
score: float #
|
| 30 |
|
| 31 |
|
| 32 |
class SubIntent(TypedDict):
|
|
@@ -66,7 +65,7 @@ class PipelineState(TypedDict):
|
|
| 66 |
# ββ Session context (set at turn start, stable across nodes) ββββββββββββββ
|
| 67 |
user_id: str
|
| 68 |
persona_profile: dict[str, Any] # full profile from users.json
|
| 69 |
-
session_history:
|
| 70 |
turn_id: int
|
| 71 |
|
| 72 |
# ββ L1: Sensing outputs βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -89,10 +88,10 @@ class PipelineState(TypedDict):
|
|
| 89 |
augmented_prompt: str | None
|
| 90 |
candidates: list[str] # 2-3 candidate responses
|
| 91 |
selected_response: str | None
|
| 92 |
-
llm_tier_used: str # "primary" | "fallback"
|
| 93 |
llm_model_used: str # actual model name (e.g. "gemma4:31b-cloud")
|
| 94 |
|
| 95 |
# ββ L5: Feedback / tracking βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 96 |
latency_log: LatencyLog | None
|
| 97 |
-
|
| 98 |
guardrail_passed: bool
|
|
|
|
| 1 |
+
# Typed state flowing through every pipeline node.
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
from typing import Any
|
|
|
|
| 5 |
|
| 6 |
from typing_extensions import TypedDict
|
| 7 |
|
|
|
|
| 25 |
text: str
|
| 26 |
bucket: str # family | medical | hobbies | daily_routine | social
|
| 27 |
user: str
|
| 28 |
+
score: float # cosine similarity from the embedder
|
| 29 |
|
| 30 |
|
| 31 |
class SubIntent(TypedDict):
|
|
|
|
| 65 |
# ββ Session context (set at turn start, stable across nodes) ββββββββββββββ
|
| 66 |
user_id: str
|
| 67 |
persona_profile: dict[str, Any] # full profile from users.json
|
| 68 |
+
session_history: list[dict]
|
| 69 |
turn_id: int
|
| 70 |
|
| 71 |
# ββ L1: Sensing outputs βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 88 |
augmented_prompt: str | None
|
| 89 |
candidates: list[str] # 2-3 candidate responses
|
| 90 |
selected_response: str | None
|
| 91 |
+
llm_tier_used: str # "primary" | "fallback"
|
| 92 |
llm_model_used: str # actual model name (e.g. "gemma4:31b-cloud")
|
| 93 |
|
| 94 |
# ββ L5: Feedback / tracking βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
latency_log: LatencyLog | None
|
| 96 |
+
run_id: str | None # UUID assigned per turn; logged to logs/turns.jsonl
|
| 97 |
guardrail_passed: bool
|
backend/retrieval/clustering.py
DELETED
|
@@ -1,84 +0,0 @@
|
|
| 1 |
-
# HDBSCAN-based semantic bucketing over BGE embeddings.
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
-
import json
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
|
| 8 |
-
from backend.config.settings import settings
|
| 9 |
-
from backend.retrieval.vector_store import _get_embedder
|
| 10 |
-
|
| 11 |
-
BUCKET_LABELS = ["family", "medical", "hobbies", "daily_routine", "social"]
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
def cluster_persona_memories(user_id: str) -> dict[str, list[str]]:
|
| 15 |
-
# Embed all memory chunks for a persona and cluster with HDBSCAN.
|
| 16 |
-
import hdbscan
|
| 17 |
-
|
| 18 |
-
memory_path = settings.memories_dir / f"{user_id}.json"
|
| 19 |
-
with open(memory_path) as f:
|
| 20 |
-
persona = json.load(f)
|
| 21 |
-
|
| 22 |
-
texts, true_buckets = [], []
|
| 23 |
-
for bucket, memories in persona["memory_buckets"].items():
|
| 24 |
-
for mem in memories:
|
| 25 |
-
texts.append(mem)
|
| 26 |
-
true_buckets.append(bucket)
|
| 27 |
-
|
| 28 |
-
embedder = _get_embedder()
|
| 29 |
-
vecs = embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
|
| 30 |
-
|
| 31 |
-
clusterer = hdbscan.HDBSCAN(
|
| 32 |
-
min_cluster_size=3,
|
| 33 |
-
min_samples=2,
|
| 34 |
-
metric="euclidean",
|
| 35 |
-
)
|
| 36 |
-
labels = clusterer.fit_predict(vecs)
|
| 37 |
-
|
| 38 |
-
clusters: dict[str, list[str]] = {}
|
| 39 |
-
for text, label, _true_bucket in zip(texts, labels, true_buckets):
|
| 40 |
-
key = f"cluster_{label}" if label >= 0 else "noise"
|
| 41 |
-
clusters.setdefault(key, []).append(text)
|
| 42 |
-
|
| 43 |
-
return clusters
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def evaluate_bucket_alignment(user_id: str) -> dict:
|
| 47 |
-
# Compare HDBSCAN clusters against hand-authored bucket labels, return purity scores.
|
| 48 |
-
import hdbscan
|
| 49 |
-
|
| 50 |
-
memory_path = settings.memories_dir / f"{user_id}.json"
|
| 51 |
-
with open(memory_path) as f:
|
| 52 |
-
persona = json.load(f)
|
| 53 |
-
|
| 54 |
-
texts, true_buckets = [], []
|
| 55 |
-
for bucket, memories in persona["memory_buckets"].items():
|
| 56 |
-
for mem in memories:
|
| 57 |
-
texts.append(mem)
|
| 58 |
-
true_buckets.append(bucket)
|
| 59 |
-
|
| 60 |
-
embedder = _get_embedder()
|
| 61 |
-
vecs = embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
|
| 62 |
-
|
| 63 |
-
clusterer = hdbscan.HDBSCAN(min_cluster_size=3, min_samples=2, metric="euclidean")
|
| 64 |
-
pred_labels = clusterer.fit_predict(vecs)
|
| 65 |
-
|
| 66 |
-
cluster_bucket_counts: dict[int, dict[str, int]] = {}
|
| 67 |
-
for pred, true in zip(pred_labels, true_buckets):
|
| 68 |
-
cluster_bucket_counts.setdefault(pred, {})
|
| 69 |
-
cluster_bucket_counts[pred][true] = cluster_bucket_counts[pred].get(true, 0) + 1
|
| 70 |
-
|
| 71 |
-
purity_scores = {}
|
| 72 |
-
for cluster_id, bucket_counts in cluster_bucket_counts.items():
|
| 73 |
-
total = sum(bucket_counts.values())
|
| 74 |
-
dominant = max(bucket_counts.values())
|
| 75 |
-
purity_scores[cluster_id] = round(dominant / total, 3)
|
| 76 |
-
|
| 77 |
-
return {
|
| 78 |
-
"n_clusters": len([k for k in purity_scores if k >= 0]),
|
| 79 |
-
"n_noise": cluster_bucket_counts.get(-1, {}),
|
| 80 |
-
"cluster_purity": purity_scores,
|
| 81 |
-
"mean_purity": round(
|
| 82 |
-
np.mean([v for k, v in purity_scores.items() if k >= 0] or [0.0]), 3
|
| 83 |
-
),
|
| 84 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/retrieval/vector_store.py
CHANGED
|
@@ -1,102 +1,96 @@
|
|
| 1 |
-
#
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
import json
|
| 5 |
from functools import lru_cache
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
-
import
|
| 9 |
|
| 10 |
from backend.config.settings import settings
|
| 11 |
from backend.pipeline.state import RetrievedChunk
|
| 12 |
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
return SentenceTransformer(settings.embed_model)
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
-
@lru_cache(maxsize=1)
|
| 22 |
-
def _get_reranker():
|
| 23 |
-
from sentence_transformers import CrossEncoder
|
| 24 |
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
@lru_cache(maxsize=1)
|
| 29 |
-
def
|
| 30 |
-
import
|
| 31 |
-
|
| 32 |
-
return faiss
|
| 33 |
|
|
|
|
| 34 |
|
| 35 |
-
# ββ Index cache (one FAISS index per user_id) βββββββββββββββββββββββββββββββββ
|
| 36 |
|
| 37 |
-
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
-
def load_index(user_id: str):
|
| 41 |
if user_id not in _index_cache:
|
| 42 |
-
faiss = _get_faiss()
|
| 43 |
store_path = settings.faiss_store_dir / user_id
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
with open(store_path / "meta.json") as f:
|
| 46 |
meta = json.load(f)
|
| 47 |
-
_index_cache[user_id] = (
|
| 48 |
return _index_cache[user_id]
|
| 49 |
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
|
| 53 |
-
|
| 54 |
def retrieve(
|
| 55 |
query: str,
|
| 56 |
user_id: str,
|
| 57 |
top_k: int = 5,
|
| 58 |
rerank_k: int = 3,
|
| 59 |
bucket_filter: str | None = None,
|
| 60 |
-
use_reranker: bool = True,
|
| 61 |
) -> list[RetrievedChunk]:
|
| 62 |
embedder = _get_embedder()
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
q_vec = embedder.encode(
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
if bucket_filter:
|
| 71 |
-
filtered = [c for c in candidates if c["bucket"] == bucket_filter]
|
| 72 |
candidates = filtered if filtered else candidates # fallback: all buckets
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
RetrievedChunk(
|
| 81 |
-
text=c["text"], bucket=c["bucket"], user=c["user"], score=float(s)
|
| 82 |
-
)
|
| 83 |
-
for s, c in ranked[:rerank_k]
|
| 84 |
-
]
|
| 85 |
-
else:
|
| 86 |
-
top = [
|
| 87 |
-
RetrievedChunk(
|
| 88 |
-
text=c["text"], bucket=c["bucket"], user=c["user"], score=1.0
|
| 89 |
-
)
|
| 90 |
-
for c in candidates[:rerank_k]
|
| 91 |
-
]
|
| 92 |
-
|
| 93 |
-
return top
|
| 94 |
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
def build_index(persona_path: str | Path):
|
| 100 |
with open(persona_path) as f:
|
| 101 |
persona = json.load(f)
|
| 102 |
|
|
@@ -109,19 +103,20 @@ def build_index(persona_path: str | Path):
|
|
| 109 |
meta.append({"text": mem, "bucket": bucket, "user": user_name})
|
| 110 |
|
| 111 |
embedder = _get_embedder()
|
| 112 |
-
vecs = embedder.encode(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
dim = vecs.shape[1]
|
| 115 |
-
faiss = _get_faiss()
|
| 116 |
-
index = faiss.IndexFlatIP(dim)
|
| 117 |
-
index.add(vecs.astype(np.float32))
|
| 118 |
-
return index, meta
|
| 119 |
|
| 120 |
-
|
| 121 |
-
def save_index(index, meta: list[dict], save_dir: str | Path) -> None:
|
| 122 |
p = Path(save_dir)
|
| 123 |
p.mkdir(parents=True, exist_ok=True)
|
| 124 |
-
|
|
|
|
| 125 |
with open(p / "meta.json", "w") as f:
|
| 126 |
json.dump(meta, f, indent=2)
|
| 127 |
|
|
@@ -133,16 +128,15 @@ def build_all(
|
|
| 133 |
memories_dir = Path(memories_dir or settings.memories_dir)
|
| 134 |
store_dir = Path(store_dir or settings.faiss_store_dir)
|
| 135 |
|
|
|
|
| 136 |
for persona_file in sorted(memories_dir.glob("*.json")):
|
| 137 |
uid = persona_file.stem
|
| 138 |
print(f" Building index for {uid} β¦")
|
| 139 |
-
|
| 140 |
-
save_index(
|
| 141 |
print(f" Saved {len(meta)} chunks β {store_dir / uid}/")
|
| 142 |
print("\nAll indexes built.")
|
| 143 |
|
| 144 |
|
| 145 |
-
# ββ Entrypoint ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 146 |
-
|
| 147 |
if __name__ == "__main__":
|
| 148 |
build_all()
|
|
|
|
| 1 |
+
# BGE embeddings + torch-tensor cosine search (mps β cuda β cpu).
|
|
|
|
|
|
|
| 2 |
import json
|
| 3 |
from functools import lru_cache
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import torch
|
| 7 |
|
| 8 |
from backend.config.settings import settings
|
| 9 |
from backend.pipeline.state import RetrievedChunk
|
| 10 |
|
| 11 |
|
| 12 |
+
def _select_device() -> str:
|
| 13 |
+
if torch.backends.mps.is_available():
|
| 14 |
+
return "mps"
|
| 15 |
+
if torch.cuda.is_available():
|
| 16 |
+
return "cuda"
|
| 17 |
+
return "cpu"
|
| 18 |
|
|
|
|
| 19 |
|
| 20 |
+
_DEVICE = _select_device()
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
def get_device() -> str:
|
| 24 |
+
return _DEVICE
|
| 25 |
|
| 26 |
|
| 27 |
@lru_cache(maxsize=1)
|
| 28 |
+
def _get_embedder():
|
| 29 |
+
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
return SentenceTransformer(settings.embed_model, device=_DEVICE)
|
| 32 |
|
|
|
|
| 33 |
|
| 34 |
+
# Index cache: one (vectors_tensor, meta) per user_id.
|
| 35 |
+
_index_cache: dict[str, tuple[torch.Tensor, list[dict]]] = {}
|
| 36 |
|
| 37 |
|
| 38 |
+
def load_index(user_id: str) -> tuple[torch.Tensor, list[dict]]:
|
| 39 |
if user_id not in _index_cache:
|
|
|
|
| 40 |
store_path = settings.faiss_store_dir / user_id
|
| 41 |
+
vecs = torch.load(
|
| 42 |
+
store_path / "vectors.pt", map_location=_DEVICE, weights_only=True
|
| 43 |
+
)
|
| 44 |
with open(store_path / "meta.json") as f:
|
| 45 |
meta = json.load(f)
|
| 46 |
+
_index_cache[user_id] = (vecs, meta)
|
| 47 |
return _index_cache[user_id]
|
| 48 |
|
| 49 |
|
| 50 |
+
# Retrieve.
|
|
|
|
|
|
|
| 51 |
def retrieve(
|
| 52 |
query: str,
|
| 53 |
user_id: str,
|
| 54 |
top_k: int = 5,
|
| 55 |
rerank_k: int = 3,
|
| 56 |
bucket_filter: str | None = None,
|
|
|
|
| 57 |
) -> list[RetrievedChunk]:
|
| 58 |
embedder = _get_embedder()
|
| 59 |
+
vecs, meta = load_index(user_id)
|
| 60 |
+
|
| 61 |
+
q_vec = embedder.encode(
|
| 62 |
+
[query],
|
| 63 |
+
convert_to_tensor=True,
|
| 64 |
+
normalize_embeddings=True,
|
| 65 |
+
device=_DEVICE,
|
| 66 |
+
)[0]
|
| 67 |
+
|
| 68 |
+
scores = vecs @ q_vec # cosine sim, vectors are L2-normalised
|
| 69 |
+
k = min(top_k, scores.shape[0])
|
| 70 |
+
top_scores, top_idxs = torch.topk(scores, k)
|
| 71 |
+
top_scores_list = top_scores.tolist()
|
| 72 |
+
top_idxs_list = top_idxs.tolist()
|
| 73 |
+
|
| 74 |
+
candidates = [
|
| 75 |
+
(top_scores_list[i], meta[idx])
|
| 76 |
+
for i, idx in enumerate(top_idxs_list)
|
| 77 |
+
if 0 <= idx < len(meta)
|
| 78 |
+
]
|
| 79 |
|
| 80 |
if bucket_filter:
|
| 81 |
+
filtered = [(s, c) for s, c in candidates if c["bucket"] == bucket_filter]
|
| 82 |
candidates = filtered if filtered else candidates # fallback: all buckets
|
| 83 |
|
| 84 |
+
return [
|
| 85 |
+
RetrievedChunk(
|
| 86 |
+
text=c["text"], bucket=c["bucket"], user=c["user"], score=float(s)
|
| 87 |
+
)
|
| 88 |
+
for s, c in candidates[:rerank_k]
|
| 89 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
+
# Index builder.
|
| 93 |
+
def build_index(persona_path: str | Path) -> tuple[torch.Tensor, list[dict]]:
|
|
|
|
|
|
|
| 94 |
with open(persona_path) as f:
|
| 95 |
persona = json.load(f)
|
| 96 |
|
|
|
|
| 103 |
meta.append({"text": mem, "bucket": bucket, "user": user_name})
|
| 104 |
|
| 105 |
embedder = _get_embedder()
|
| 106 |
+
vecs = embedder.encode(
|
| 107 |
+
chunks,
|
| 108 |
+
convert_to_tensor=True,
|
| 109 |
+
normalize_embeddings=True,
|
| 110 |
+
device=_DEVICE,
|
| 111 |
+
)
|
| 112 |
+
return vecs, meta
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
def save_index(vecs: torch.Tensor, meta: list[dict], save_dir: str | Path) -> None:
|
|
|
|
| 116 |
p = Path(save_dir)
|
| 117 |
p.mkdir(parents=True, exist_ok=True)
|
| 118 |
+
# Move to CPU before saving so the file is portable across devices.
|
| 119 |
+
torch.save(vecs.detach().cpu(), p / "vectors.pt")
|
| 120 |
with open(p / "meta.json", "w") as f:
|
| 121 |
json.dump(meta, f, indent=2)
|
| 122 |
|
|
|
|
| 128 |
memories_dir = Path(memories_dir or settings.memories_dir)
|
| 129 |
store_dir = Path(store_dir or settings.faiss_store_dir)
|
| 130 |
|
| 131 |
+
print(f"Embedder device: {_DEVICE}")
|
| 132 |
for persona_file in sorted(memories_dir.glob("*.json")):
|
| 133 |
uid = persona_file.stem
|
| 134 |
print(f" Building index for {uid} β¦")
|
| 135 |
+
vecs, meta = build_index(persona_file)
|
| 136 |
+
save_index(vecs, meta, store_dir / uid)
|
| 137 |
print(f" Saved {len(meta)} chunks β {store_dir / uid}/")
|
| 138 |
print("\nAll indexes built.")
|
| 139 |
|
| 140 |
|
|
|
|
|
|
|
| 141 |
if __name__ == "__main__":
|
| 142 |
build_all()
|
backend/sensing/air_writing.py
DELETED
|
@@ -1,155 +0,0 @@
|
|
| 1 |
-
# Air writing recognition β fingertip trajectory β DTW character matching.
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
-
import time
|
| 5 |
-
from dataclasses import dataclass, field
|
| 6 |
-
|
| 7 |
-
import numpy as np
|
| 8 |
-
|
| 9 |
-
from backend.config.settings import settings
|
| 10 |
-
|
| 11 |
-
mp = None
|
| 12 |
-
|
| 13 |
-
# ββ Landmark index βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
-
_INDEX_TIP = 8
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
@dataclass
|
| 18 |
-
class AirWriter:
|
| 19 |
-
"""
|
| 20 |
-
Stateful air-writing recogniser. Feed frames from a webcam loop.
|
| 21 |
-
Call `get_text()` to retrieve and clear the current buffer.
|
| 22 |
-
"""
|
| 23 |
-
|
| 24 |
-
_trajectory: list[tuple[float, float]] = field(default_factory=list)
|
| 25 |
-
_in_stroke: bool = False
|
| 26 |
-
_stroke_end_time: float = field(default=0.0)
|
| 27 |
-
_text_buffer: list[str] = field(default_factory=list)
|
| 28 |
-
_templates: dict[str, np.ndarray] = field(default_factory=dict)
|
| 29 |
-
|
| 30 |
-
def __post_init__(self):
|
| 31 |
-
global mp
|
| 32 |
-
import mediapipe as mp
|
| 33 |
-
|
| 34 |
-
self._hands = mp.solutions.hands.Hands(
|
| 35 |
-
static_image_mode=False,
|
| 36 |
-
max_num_hands=1,
|
| 37 |
-
min_detection_confidence=0.6,
|
| 38 |
-
min_tracking_confidence=0.5,
|
| 39 |
-
)
|
| 40 |
-
self._prev_pt: tuple[float, float] | None = None
|
| 41 |
-
self._templates = _load_templates()
|
| 42 |
-
|
| 43 |
-
def process_frame(self, bgr_frame) -> str | None:
|
| 44 |
-
"""
|
| 45 |
-
Process one frame. Returns a recognised character when a stroke
|
| 46 |
-
completes, or None otherwise.
|
| 47 |
-
"""
|
| 48 |
-
import cv2
|
| 49 |
-
|
| 50 |
-
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
|
| 51 |
-
result = self._hands.process(rgb)
|
| 52 |
-
|
| 53 |
-
if not result.multi_hand_landmarks:
|
| 54 |
-
self._prev_pt = None
|
| 55 |
-
return self._check_stroke_end()
|
| 56 |
-
|
| 57 |
-
h, w = bgr_frame.shape[:2]
|
| 58 |
-
lm = result.multi_hand_landmarks[0].landmark
|
| 59 |
-
tip = (lm[_INDEX_TIP].x * w, lm[_INDEX_TIP].y * h)
|
| 60 |
-
|
| 61 |
-
velocity = 0.0
|
| 62 |
-
if self._prev_pt is not None:
|
| 63 |
-
velocity = np.linalg.norm(np.array(tip) - np.array(self._prev_pt))
|
| 64 |
-
self._prev_pt = tip
|
| 65 |
-
|
| 66 |
-
start_v = settings.air_write_velocity_start
|
| 67 |
-
end_v = settings.air_write_velocity_end
|
| 68 |
-
|
| 69 |
-
if velocity > start_v:
|
| 70 |
-
self._in_stroke = True
|
| 71 |
-
self._trajectory.append(tip)
|
| 72 |
-
self._stroke_end_time = 0.0
|
| 73 |
-
elif self._in_stroke and velocity < end_v:
|
| 74 |
-
if self._stroke_end_time == 0.0:
|
| 75 |
-
self._stroke_end_time = time.time()
|
| 76 |
-
return self._check_stroke_end()
|
| 77 |
-
|
| 78 |
-
return None
|
| 79 |
-
|
| 80 |
-
def _check_stroke_end(self) -> str | None:
|
| 81 |
-
if not self._in_stroke or self._stroke_end_time == 0.0:
|
| 82 |
-
return None
|
| 83 |
-
gap_s = settings.air_write_end_gap_ms / 1000.0
|
| 84 |
-
if time.time() - self._stroke_end_time >= gap_s:
|
| 85 |
-
char = self._recognise(self._trajectory)
|
| 86 |
-
self._trajectory = []
|
| 87 |
-
self._in_stroke = False
|
| 88 |
-
self._stroke_end_time = 0.0
|
| 89 |
-
if char:
|
| 90 |
-
self._text_buffer.append(char)
|
| 91 |
-
return char
|
| 92 |
-
return None
|
| 93 |
-
|
| 94 |
-
def _recognise(self, trajectory: list[tuple[float, float]]) -> str | None:
|
| 95 |
-
if len(trajectory) < 5 or not self._templates:
|
| 96 |
-
return None
|
| 97 |
-
query = _normalise_trajectory(np.array(trajectory))
|
| 98 |
-
best_char, best_dist = None, float("inf")
|
| 99 |
-
for char, template in self._templates.items():
|
| 100 |
-
dist = _dtw_distance(query, template)
|
| 101 |
-
if dist < best_dist:
|
| 102 |
-
best_dist = dist
|
| 103 |
-
best_char = char
|
| 104 |
-
return best_char
|
| 105 |
-
|
| 106 |
-
def get_text(self) -> str:
|
| 107 |
-
"""Return and clear the accumulated air-written text."""
|
| 108 |
-
text = "".join(self._text_buffer)
|
| 109 |
-
self._text_buffer.clear()
|
| 110 |
-
return text
|
| 111 |
-
|
| 112 |
-
def release(self):
|
| 113 |
-
self._hands.close()
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
# ββ DTW helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
def _normalise_trajectory(pts: np.ndarray) -> np.ndarray:
|
| 120 |
-
"""Scale trajectory to unit bounding box, resample to 32 points."""
|
| 121 |
-
pts = pts - pts.min(axis=0)
|
| 122 |
-
scale = pts.max(axis=0) + 1e-6
|
| 123 |
-
pts = pts / scale
|
| 124 |
-
# Resample to fixed length via linear interpolation
|
| 125 |
-
t_old = np.linspace(0, 1, len(pts))
|
| 126 |
-
t_new = np.linspace(0, 1, 32)
|
| 127 |
-
return np.column_stack(
|
| 128 |
-
[
|
| 129 |
-
np.interp(t_new, t_old, pts[:, 0]),
|
| 130 |
-
np.interp(t_new, t_old, pts[:, 1]),
|
| 131 |
-
]
|
| 132 |
-
)
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
def _dtw_distance(a: np.ndarray, b: np.ndarray) -> float:
|
| 136 |
-
"""Simple O(nΒ²) DTW β trajectories are short (32 pts), so this is fine."""
|
| 137 |
-
n, m = len(a), len(b)
|
| 138 |
-
dtw = np.full((n + 1, m + 1), np.inf)
|
| 139 |
-
dtw[0, 0] = 0.0
|
| 140 |
-
for i in range(1, n + 1):
|
| 141 |
-
for j in range(1, m + 1):
|
| 142 |
-
cost = np.linalg.norm(a[i - 1] - b[j - 1])
|
| 143 |
-
dtw[i, j] = cost + min(dtw[i - 1, j], dtw[i, j - 1], dtw[i - 1, j - 1])
|
| 144 |
-
return float(dtw[n, m])
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
def _load_templates() -> dict[str, np.ndarray]:
|
| 148 |
-
template_dir = settings.data_dir / "air_write_templates"
|
| 149 |
-
if not template_dir.exists():
|
| 150 |
-
return {}
|
| 151 |
-
templates = {}
|
| 152 |
-
for f in template_dir.glob("*.npy"):
|
| 153 |
-
char = f.stem # filename = character label
|
| 154 |
-
templates[char] = np.load(f)
|
| 155 |
-
return templates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/sensing/face_mesh.py
DELETED
|
@@ -1,145 +0,0 @@
|
|
| 1 |
-
# Facial affect detection via MediaPipe Face Mesh (MAR/EAR/BRI/LCP β emotion).
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
-
from dataclasses import dataclass, field
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
|
| 8 |
-
from backend.config.settings import settings
|
| 9 |
-
from backend.pipeline.state import AffectState, AffectVector
|
| 10 |
-
|
| 11 |
-
mp = None
|
| 12 |
-
cv2 = None
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
# ββ MediaPipe landmark indices ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
-
|
| 17 |
-
# MAR β mouth vertical / horizontal ratio
|
| 18 |
-
_MOUTH_TOP = 13
|
| 19 |
-
_MOUTH_BOTTOM = 14
|
| 20 |
-
_MOUTH_LEFT = 61
|
| 21 |
-
_MOUTH_RIGHT = 291
|
| 22 |
-
|
| 23 |
-
# EAR β eye vertical / horizontal ratio (right eye)
|
| 24 |
-
_EYE_TOP = 159
|
| 25 |
-
_EYE_BOTTOM = 145
|
| 26 |
-
_EYE_LEFT = 33
|
| 27 |
-
_EYE_RIGHT = 133
|
| 28 |
-
|
| 29 |
-
# BRI β brow vertical displacement relative to eye centre
|
| 30 |
-
_BROW_LEFT = 70
|
| 31 |
-
_BROW_RIGHT = 300
|
| 32 |
-
|
| 33 |
-
# LCP β mouth corner horizontal displacement from neutral baseline
|
| 34 |
-
_CORNER_LEFT = 61
|
| 35 |
-
_CORNER_RIGHT = 291
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
# ββ Affect classes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
-
|
| 40 |
-
AFFECT_CLASSES = ["HAPPY", "FRUSTRATED", "NEUTRAL", "SURPRISED"]
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
@dataclass
|
| 44 |
-
class AffectDetector:
|
| 45 |
-
"""
|
| 46 |
-
Stateful detector that maintains EMA-smoothed affect across frames.
|
| 47 |
-
Create one instance per session and call `process_frame` each frame.
|
| 48 |
-
"""
|
| 49 |
-
|
| 50 |
-
_smoothed: AffectVector = field(
|
| 51 |
-
default_factory=lambda: AffectVector(MAR=0.0, EAR=0.3, BRI=0.0, LCP=0.0)
|
| 52 |
-
)
|
| 53 |
-
_neutral_lcp: float = 0.0 # calibrated at session start
|
| 54 |
-
_calibrated: bool = False
|
| 55 |
-
|
| 56 |
-
def __post_init__(self):
|
| 57 |
-
global mp, cv2
|
| 58 |
-
import cv2
|
| 59 |
-
import mediapipe as mp
|
| 60 |
-
|
| 61 |
-
self._face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 62 |
-
static_image_mode=False,
|
| 63 |
-
max_num_faces=1,
|
| 64 |
-
refine_landmarks=True, # enables iris landmarks (468-477)
|
| 65 |
-
min_detection_confidence=0.5,
|
| 66 |
-
min_tracking_confidence=0.5,
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
def process_frame(self, bgr_frame: np.ndarray) -> AffectState | None:
|
| 70 |
-
"""
|
| 71 |
-
Process one BGR frame from OpenCV and return the current AffectState,
|
| 72 |
-
or None if no face is detected.
|
| 73 |
-
"""
|
| 74 |
-
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
|
| 75 |
-
result = self._face_mesh.process(rgb)
|
| 76 |
-
|
| 77 |
-
if not result.multi_face_landmarks:
|
| 78 |
-
return None
|
| 79 |
-
|
| 80 |
-
lm = result.multi_face_landmarks[0].landmark
|
| 81 |
-
h, w = bgr_frame.shape[:2]
|
| 82 |
-
|
| 83 |
-
def pt(idx):
|
| 84 |
-
l = lm[idx]
|
| 85 |
-
return np.array([l.x * w, l.y * h])
|
| 86 |
-
|
| 87 |
-
raw = self._compute_features(pt)
|
| 88 |
-
|
| 89 |
-
if not self._calibrated:
|
| 90 |
-
self._neutral_lcp = raw["LCP"]
|
| 91 |
-
self._calibrated = True
|
| 92 |
-
|
| 93 |
-
raw["LCP"] = raw["LCP"] - self._neutral_lcp # relative to neutral baseline
|
| 94 |
-
|
| 95 |
-
alpha = settings.affect_ema_alpha
|
| 96 |
-
smoothed = AffectVector(
|
| 97 |
-
MAR=alpha * raw["MAR"] + (1 - alpha) * self._smoothed["MAR"],
|
| 98 |
-
EAR=alpha * raw["EAR"] + (1 - alpha) * self._smoothed["EAR"],
|
| 99 |
-
BRI=alpha * raw["BRI"] + (1 - alpha) * self._smoothed["BRI"],
|
| 100 |
-
LCP=alpha * raw["LCP"] + (1 - alpha) * self._smoothed["LCP"],
|
| 101 |
-
)
|
| 102 |
-
self._smoothed = smoothed
|
| 103 |
-
|
| 104 |
-
emotion = self._classify(smoothed)
|
| 105 |
-
return AffectState(emotion=emotion, vector=raw, smoothed=smoothed)
|
| 106 |
-
|
| 107 |
-
def _compute_features(self, pt) -> dict:
|
| 108 |
-
# MAR
|
| 109 |
-
mouth_v = np.linalg.norm(pt(_MOUTH_TOP) - pt(_MOUTH_BOTTOM))
|
| 110 |
-
mouth_h = np.linalg.norm(pt(_MOUTH_LEFT) - pt(_MOUTH_RIGHT))
|
| 111 |
-
MAR = mouth_v / (mouth_h + 1e-6)
|
| 112 |
-
|
| 113 |
-
# EAR
|
| 114 |
-
eye_v = np.linalg.norm(pt(_EYE_TOP) - pt(_EYE_BOTTOM))
|
| 115 |
-
eye_h = np.linalg.norm(pt(_EYE_LEFT) - pt(_EYE_RIGHT))
|
| 116 |
-
EAR = eye_v / (eye_h + 1e-6)
|
| 117 |
-
|
| 118 |
-
# BRI β average brow displacement relative to eye centre
|
| 119 |
-
eye_center = (pt(_EYE_LEFT) + pt(_EYE_RIGHT)) / 2
|
| 120 |
-
inter_ocular = eye_h
|
| 121 |
-
brow_mid = (pt(_BROW_LEFT) + pt(_BROW_RIGHT)) / 2
|
| 122 |
-
BRI = (eye_center[1] - brow_mid[1]) / (inter_ocular + 1e-6)
|
| 123 |
-
|
| 124 |
-
# LCP β average horizontal mouth corner displacement
|
| 125 |
-
LCP = float((pt(_CORNER_LEFT)[0] + pt(_CORNER_RIGHT)[0]) / 2)
|
| 126 |
-
|
| 127 |
-
return {
|
| 128 |
-
"MAR": float(MAR),
|
| 129 |
-
"EAR": float(EAR),
|
| 130 |
-
"BRI": float(BRI),
|
| 131 |
-
"LCP": float(LCP),
|
| 132 |
-
}
|
| 133 |
-
|
| 134 |
-
@staticmethod
|
| 135 |
-
def _classify(v: AffectVector) -> str:
|
| 136 |
-
if v["BRI"] > 0.25 and v["MAR"] > 0.3:
|
| 137 |
-
return "SURPRISED"
|
| 138 |
-
if v["EAR"] < 0.15 and v["LCP"] < -5:
|
| 139 |
-
return "FRUSTRATED"
|
| 140 |
-
if v["LCP"] > 5:
|
| 141 |
-
return "HAPPY"
|
| 142 |
-
return "NEUTRAL"
|
| 143 |
-
|
| 144 |
-
def release(self):
|
| 145 |
-
self._face_mesh.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/sensing/gaze.py
DELETED
|
@@ -1,92 +0,0 @@
|
|
| 1 |
-
# Gaze-based retrieval bucket hinting via MediaPipe iris landmarks.
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
-
import time
|
| 5 |
-
from dataclasses import dataclass, field
|
| 6 |
-
|
| 7 |
-
from backend.config.settings import settings
|
| 8 |
-
|
| 9 |
-
mp = None
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
# ββ Iris landmark indices ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
-
# MediaPipe refine_landmarks=True adds iris landmarks 468-477
|
| 14 |
-
_LEFT_IRIS_CENTER = 468
|
| 15 |
-
_RIGHT_IRIS_CENTER = 473
|
| 16 |
-
|
| 17 |
-
# ββ Screen region β bucket map βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
-
# Defined as (x_min, y_min, x_max, y_max) in normalised [0,1] coords
|
| 19 |
-
_REGION_BUCKET: list[tuple[tuple[float, float, float, float], str]] = [
|
| 20 |
-
((0.3, 0.3, 0.7, 0.7), "social"), # centre checked first (most specific)
|
| 21 |
-
((0.0, 0.0, 0.5, 0.5), "family"),
|
| 22 |
-
((0.5, 0.0, 1.0, 0.5), "medical"),
|
| 23 |
-
((0.0, 0.5, 0.5, 1.0), "hobbies"),
|
| 24 |
-
((0.5, 0.5, 1.0, 1.0), "daily_routine"),
|
| 25 |
-
]
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
@dataclass
|
| 29 |
-
class GazeTracker:
|
| 30 |
-
"""
|
| 31 |
-
Stateful gaze tracker. Call `process_frame` each frame.
|
| 32 |
-
Returns the bucket name when dwell threshold is exceeded, else None.
|
| 33 |
-
"""
|
| 34 |
-
|
| 35 |
-
_dwell_start: float = field(default=0.0)
|
| 36 |
-
_current_region: str | None = field(default=None)
|
| 37 |
-
|
| 38 |
-
def __post_init__(self):
|
| 39 |
-
global mp
|
| 40 |
-
import mediapipe as mp
|
| 41 |
-
|
| 42 |
-
self._face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 43 |
-
static_image_mode=False,
|
| 44 |
-
max_num_faces=1,
|
| 45 |
-
refine_landmarks=True,
|
| 46 |
-
min_detection_confidence=0.5,
|
| 47 |
-
min_tracking_confidence=0.5,
|
| 48 |
-
)
|
| 49 |
-
|
| 50 |
-
def process_frame(self, bgr_frame) -> str | None:
|
| 51 |
-
import cv2
|
| 52 |
-
|
| 53 |
-
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
|
| 54 |
-
result = self._face_mesh.process(rgb)
|
| 55 |
-
|
| 56 |
-
if not result.multi_face_landmarks:
|
| 57 |
-
self._reset()
|
| 58 |
-
return None
|
| 59 |
-
|
| 60 |
-
lm = result.multi_face_landmarks[0].landmark
|
| 61 |
-
|
| 62 |
-
# Average left + right iris centres for gaze estimate
|
| 63 |
-
gaze_x = (lm[_LEFT_IRIS_CENTER].x + lm[_RIGHT_IRIS_CENTER].x) / 2
|
| 64 |
-
gaze_y = (lm[_LEFT_IRIS_CENTER].y + lm[_RIGHT_IRIS_CENTER].y) / 2
|
| 65 |
-
|
| 66 |
-
bucket = self._region_for(gaze_x, gaze_y)
|
| 67 |
-
|
| 68 |
-
if bucket != self._current_region:
|
| 69 |
-
self._current_region = bucket
|
| 70 |
-
self._dwell_start = time.time()
|
| 71 |
-
return None
|
| 72 |
-
|
| 73 |
-
dwell = time.time() - self._dwell_start
|
| 74 |
-
if dwell >= settings.gaze_dwell_threshold_s and bucket is not None:
|
| 75 |
-
self._reset()
|
| 76 |
-
return bucket
|
| 77 |
-
|
| 78 |
-
return None
|
| 79 |
-
|
| 80 |
-
@staticmethod
|
| 81 |
-
def _region_for(x: float, y: float) -> str | None:
|
| 82 |
-
for (x0, y0, x1, y1), bucket in _REGION_BUCKET:
|
| 83 |
-
if x0 <= x <= x1 and y0 <= y <= y1:
|
| 84 |
-
return bucket
|
| 85 |
-
return None
|
| 86 |
-
|
| 87 |
-
def _reset(self):
|
| 88 |
-
self._dwell_start = 0.0
|
| 89 |
-
self._current_region = None
|
| 90 |
-
|
| 91 |
-
def release(self):
|
| 92 |
-
self._face_mesh.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/sensing/gesture.py
DELETED
|
@@ -1,102 +0,0 @@
|
|
| 1 |
-
# Hand gesture recognition via MediaPipe Hands.
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
-
import numpy as np
|
| 5 |
-
|
| 6 |
-
mp = None
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
# Gesture β prompt constraint tag mapping
|
| 10 |
-
GESTURE_TO_TAG: dict[str, str] = {
|
| 11 |
-
"THUMBS_UP": "[GESTURE:THUMBS_UP][TONE:AFFIRMATIVE]",
|
| 12 |
-
"THUMBS_DOWN": "[GESTURE:THUMBS_DOWN][TONE:NEGATIVE]",
|
| 13 |
-
"POINTING": "[GESTURE:POINTING][INTENT:REFERENTIAL]",
|
| 14 |
-
"WAVING": "[GESTURE:WAVING][INTENT:GREETING]",
|
| 15 |
-
}
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class GestureClassifier:
|
| 19 |
-
"""
|
| 20 |
-
Stateful classifier β create one instance per session.
|
| 21 |
-
Feed MediaPipe hand landmark results each frame.
|
| 22 |
-
"""
|
| 23 |
-
|
| 24 |
-
def __init__(self):
|
| 25 |
-
global mp
|
| 26 |
-
import mediapipe as mp
|
| 27 |
-
|
| 28 |
-
self._hands = mp.solutions.hands.Hands(
|
| 29 |
-
static_image_mode=False,
|
| 30 |
-
max_num_hands=1,
|
| 31 |
-
min_detection_confidence=0.6,
|
| 32 |
-
min_tracking_confidence=0.5,
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
def process_frame(self, bgr_frame) -> str | None:
|
| 36 |
-
"""
|
| 37 |
-
Returns a gesture label string or None if no clear gesture is detected.
|
| 38 |
-
"""
|
| 39 |
-
import cv2
|
| 40 |
-
|
| 41 |
-
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
|
| 42 |
-
result = self._hands.process(rgb)
|
| 43 |
-
|
| 44 |
-
if not result.multi_hand_landmarks:
|
| 45 |
-
return None
|
| 46 |
-
|
| 47 |
-
lm = result.multi_hand_landmarks[0].landmark
|
| 48 |
-
pts = np.array([[l.x, l.y, l.z] for l in lm])
|
| 49 |
-
|
| 50 |
-
return self._classify(pts)
|
| 51 |
-
|
| 52 |
-
def gesture_tag(self, bgr_frame) -> str | None:
|
| 53 |
-
"""Convenience: returns the prompt tag directly, or None."""
|
| 54 |
-
gesture = self.process_frame(bgr_frame)
|
| 55 |
-
return GESTURE_TO_TAG.get(gesture) if gesture else None
|
| 56 |
-
|
| 57 |
-
@staticmethod
|
| 58 |
-
def _classify(pts: np.ndarray) -> str | None:
|
| 59 |
-
# Normalise: wrist at origin, scale by palm width
|
| 60 |
-
wrist = pts[0]
|
| 61 |
-
palm_width = np.linalg.norm(pts[5] - pts[17]) + 1e-6
|
| 62 |
-
p = (pts - wrist) / palm_width
|
| 63 |
-
|
| 64 |
-
thumb_tip = p[4]
|
| 65 |
-
index_tip = p[8]
|
| 66 |
-
middle_tip = p[12]
|
| 67 |
-
ring_tip = p[16]
|
| 68 |
-
pinky_tip = p[20]
|
| 69 |
-
index_mcp = p[5] # knuckle
|
| 70 |
-
|
| 71 |
-
# THUMBS_UP: thumb tip above wrist, other fingers curled
|
| 72 |
-
fingers_curled = all(
|
| 73 |
-
np.linalg.norm(tip) < np.linalg.norm(mcp)
|
| 74 |
-
for tip, mcp in [(index_tip, p[5]), (middle_tip, p[9]), (ring_tip, p[13])]
|
| 75 |
-
)
|
| 76 |
-
if thumb_tip[1] < -0.3 and fingers_curled:
|
| 77 |
-
return "THUMBS_UP"
|
| 78 |
-
|
| 79 |
-
# THUMBS_DOWN: thumb tip below wrist, other fingers curled
|
| 80 |
-
if thumb_tip[1] > 0.3 and fingers_curled:
|
| 81 |
-
return "THUMBS_DOWN"
|
| 82 |
-
|
| 83 |
-
# POINTING: index extended, others curled
|
| 84 |
-
index_extended = np.linalg.norm(index_tip) > np.linalg.norm(index_mcp) * 1.3
|
| 85 |
-
others_curled = all(
|
| 86 |
-
np.linalg.norm(tip) < 0.5 for tip in [middle_tip, ring_tip, pinky_tip]
|
| 87 |
-
)
|
| 88 |
-
if index_extended and others_curled:
|
| 89 |
-
return "POINTING"
|
| 90 |
-
|
| 91 |
-
# WAVING: all fingers extended, hand roughly vertical
|
| 92 |
-
all_extended = all(
|
| 93 |
-
np.linalg.norm(tip) > 0.5
|
| 94 |
-
for tip in [index_tip, middle_tip, ring_tip, pinky_tip]
|
| 95 |
-
)
|
| 96 |
-
if all_extended:
|
| 97 |
-
return "WAVING"
|
| 98 |
-
|
| 99 |
-
return None
|
| 100 |
-
|
| 101 |
-
def release(self):
|
| 102 |
-
self._hands.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/sensing/labels.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GESTURE_TO_TAG: dict[str, str] = {
|
| 2 |
+
"THUMBS_UP": "[GESTURE:THUMBS_UP][TONE:AFFIRMATIVE]",
|
| 3 |
+
"THUMBS_DOWN": "[GESTURE:THUMBS_DOWN][TONE:NEGATIVE]",
|
| 4 |
+
"POINTING": "[GESTURE:POINTING][INTENT:REFERENTIAL]",
|
| 5 |
+
"WAVING": "[GESTURE:WAVING][INTENT:GREETING]",
|
| 6 |
+
}
|
backend/ui/app.py
DELETED
|
@@ -1,163 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Streamlit frontend β webcam + chat + live metrics dashboard.
|
| 3 |
-
|
| 4 |
-
Panels:
|
| 5 |
-
Left sidebar β persona selector, session controls, live affect display
|
| 6 |
-
Centre β chat interface with streaming response
|
| 7 |
-
Right sidebar β latency breakdown, bucket priors bar chart
|
| 8 |
-
|
| 9 |
-
Run: streamlit run ui/app.py
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
from __future__ import annotations
|
| 13 |
-
|
| 14 |
-
import requests
|
| 15 |
-
import streamlit as st
|
| 16 |
-
|
| 17 |
-
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
-
API_BASE = "http://localhost:8000"
|
| 19 |
-
|
| 20 |
-
st.set_page_config(
|
| 21 |
-
page_title="AAC Chatbot",
|
| 22 |
-
layout="wide",
|
| 23 |
-
initial_sidebar_state="expanded",
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# ββ Session state init βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
-
if "user_id" not in st.session_state:
|
| 29 |
-
st.session_state.user_id = None
|
| 30 |
-
if "messages" not in st.session_state:
|
| 31 |
-
st.session_state.messages = []
|
| 32 |
-
if "last_latency" not in st.session_state:
|
| 33 |
-
st.session_state.last_latency = {}
|
| 34 |
-
if "last_affect" not in st.session_state:
|
| 35 |
-
st.session_state.last_affect = "NEUTRAL"
|
| 36 |
-
if "affect_override" not in st.session_state:
|
| 37 |
-
st.session_state.affect_override = None
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
# ββ Sidebar ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
-
with st.sidebar:
|
| 42 |
-
st.title("AAC Chatbot")
|
| 43 |
-
|
| 44 |
-
# Persona selection
|
| 45 |
-
try:
|
| 46 |
-
users_resp = requests.get(f"{API_BASE}/users", timeout=3)
|
| 47 |
-
users = users_resp.json().get("users", [])
|
| 48 |
-
except Exception:
|
| 49 |
-
users = []
|
| 50 |
-
st.error("API not reachable β start the FastAPI server first.")
|
| 51 |
-
|
| 52 |
-
user_options = {u["id"]: f"{u['name']} ({u['condition']})" for u in users}
|
| 53 |
-
selected = st.selectbox(
|
| 54 |
-
"Select persona",
|
| 55 |
-
options=list(user_options.keys()),
|
| 56 |
-
format_func=lambda k: user_options.get(k, k),
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
if selected != st.session_state.user_id:
|
| 60 |
-
st.session_state.user_id = selected
|
| 61 |
-
st.session_state.messages = []
|
| 62 |
-
try:
|
| 63 |
-
requests.post(f"{API_BASE}/session/reset", params={"user_id": selected})
|
| 64 |
-
except Exception:
|
| 65 |
-
pass
|
| 66 |
-
|
| 67 |
-
st.divider()
|
| 68 |
-
|
| 69 |
-
# Affect override (for demo / testing without webcam)
|
| 70 |
-
st.subheader("Affect Override")
|
| 71 |
-
st.caption("Simulates webcam affect detection")
|
| 72 |
-
affect_choice = st.radio(
|
| 73 |
-
"Current affect",
|
| 74 |
-
["Auto (webcam)", "HAPPY", "FRUSTRATED", "NEUTRAL", "SURPRISED"],
|
| 75 |
-
index=0,
|
| 76 |
-
)
|
| 77 |
-
st.session_state.affect_override = (
|
| 78 |
-
None if affect_choice == "Auto (webcam)" else affect_choice
|
| 79 |
-
)
|
| 80 |
-
|
| 81 |
-
st.divider()
|
| 82 |
-
|
| 83 |
-
# Live affect indicator
|
| 84 |
-
st.subheader("Detected Affect")
|
| 85 |
-
affect_emoji = {
|
| 86 |
-
"HAPPY": "π",
|
| 87 |
-
"FRUSTRATED": "π€",
|
| 88 |
-
"NEUTRAL": "π",
|
| 89 |
-
"SURPRISED": "π²",
|
| 90 |
-
}
|
| 91 |
-
af = st.session_state.last_affect
|
| 92 |
-
st.markdown(f"### {affect_emoji.get(af, 'β')} {af}")
|
| 93 |
-
|
| 94 |
-
# Webcam placeholder
|
| 95 |
-
st.divider()
|
| 96 |
-
st.subheader("Webcam Feed")
|
| 97 |
-
st.info(
|
| 98 |
-
"Live webcam sensing runs in the sensing client.\nAffect is sent to the API automatically."
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
# ββ Main chat area βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
-
st.header(f"Talking as: {user_options.get(st.session_state.user_id, 'β')}")
|
| 104 |
-
|
| 105 |
-
chat_col, metrics_col = st.columns([3, 1])
|
| 106 |
-
|
| 107 |
-
with chat_col:
|
| 108 |
-
for msg in st.session_state.messages:
|
| 109 |
-
role_label = "Partner" if msg["role"] == "partner" else "AAC User"
|
| 110 |
-
with st.chat_message("user" if msg["role"] == "partner" else "assistant"):
|
| 111 |
-
st.markdown(f"**{role_label}:** {msg['content']}")
|
| 112 |
-
|
| 113 |
-
query = st.chat_input("Type as the communication partnerβ¦")
|
| 114 |
-
|
| 115 |
-
if query and st.session_state.user_id:
|
| 116 |
-
st.session_state.messages.append({"role": "partner", "content": query})
|
| 117 |
-
with st.chat_message("user"):
|
| 118 |
-
st.markdown(f"**Partner:** {query}")
|
| 119 |
-
|
| 120 |
-
with st.chat_message("assistant"):
|
| 121 |
-
with st.spinner("Generating responseβ¦"):
|
| 122 |
-
try:
|
| 123 |
-
payload = {
|
| 124 |
-
"user_id": st.session_state.user_id,
|
| 125 |
-
"query": query,
|
| 126 |
-
"affect_override": st.session_state.affect_override,
|
| 127 |
-
}
|
| 128 |
-
resp = requests.post(f"{API_BASE}/chat", json=payload, timeout=15)
|
| 129 |
-
resp.raise_for_status()
|
| 130 |
-
data = resp.json()
|
| 131 |
-
|
| 132 |
-
response_text = data.get("response", "I don't know.")
|
| 133 |
-
st.markdown(f"**AAC User:** {response_text}")
|
| 134 |
-
|
| 135 |
-
st.session_state.messages.append(
|
| 136 |
-
{"role": "aac_user", "content": response_text}
|
| 137 |
-
)
|
| 138 |
-
st.session_state.last_affect = data.get("affect", "NEUTRAL")
|
| 139 |
-
st.session_state.last_latency = data.get("latency", {})
|
| 140 |
-
|
| 141 |
-
if not data.get("guardrail_passed", True):
|
| 142 |
-
st.warning("β Guardrail triggered β response was sanitised.")
|
| 143 |
-
|
| 144 |
-
except requests.exceptions.Timeout:
|
| 145 |
-
st.error("Request timed out. Is the server running?")
|
| 146 |
-
except Exception as e:
|
| 147 |
-
st.error(f"Error: {e}")
|
| 148 |
-
|
| 149 |
-
with metrics_col:
|
| 150 |
-
st.subheader("Turn Latency (s)")
|
| 151 |
-
lat = st.session_state.last_latency
|
| 152 |
-
if lat:
|
| 153 |
-
for key, label in [
|
| 154 |
-
("t_sensing", "Sensing"),
|
| 155 |
-
("t_intent", "Intent"),
|
| 156 |
-
("t_retrieval", "Retrieval"),
|
| 157 |
-
("t_generation", "Generation"),
|
| 158 |
-
("t_total", "**Total**"),
|
| 159 |
-
]:
|
| 160 |
-
val = lat.get(key, 0.0)
|
| 161 |
-
st.metric(label=label, value=f"{val:.3f}s")
|
| 162 |
-
else:
|
| 163 |
-
st.caption("No turn yet.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,38 +1,19 @@
|
|
| 1 |
-
# ββ
|
| 2 |
-
|
| 3 |
-
langchain-core>=0.2
|
| 4 |
-
pydantic>=2.0
|
| 5 |
-
pydantic-settings>=2.0
|
| 6 |
-
|
| 7 |
-
# ββ LLM clients ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 8 |
-
openai>=1.0 # OpenAI-compatible client for vLLM + Ollama
|
| 9 |
-
ollama>=0.2 # local dev fallback (direct Ollama SDK)
|
| 10 |
|
| 11 |
# ββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
-
faiss-cpu>=1.7
|
| 13 |
sentence-transformers>=3.0
|
| 14 |
torch>=2.0
|
| 15 |
transformers>=4.40
|
| 16 |
numpy>=1.24
|
| 17 |
|
| 18 |
-
# ββ Clustering βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
-
hdbscan>=0.8.29
|
| 20 |
-
scikit-learn>=1.3
|
| 21 |
-
|
| 22 |
-
# ββ Sensing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
-
mediapipe>=0.10
|
| 24 |
-
opencv-python>=4.8
|
| 25 |
-
|
| 26 |
# ββ API backend ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
fastapi>=0.111
|
| 28 |
uvicorn[standard]>=0.29
|
| 29 |
|
| 30 |
-
# ββ
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# ββ Experiment tracking ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
-
mlflow>=2.13
|
| 36 |
|
| 37 |
# ββ Utilities ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
python-dotenv>=1.0
|
|
|
|
| 1 |
+
# ββ LLM client ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
openai>=1.0 # talks to Ollama Cloud over OpenAI-compatible HTTP
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
# ββ Retrieval ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 5 |
sentence-transformers>=3.0
|
| 6 |
torch>=2.0
|
| 7 |
transformers>=4.40
|
| 8 |
numpy>=1.24
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# ββ API backend ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
fastapi>=0.111
|
| 12 |
uvicorn[standard]>=0.29
|
| 13 |
|
| 14 |
+
# ββ Config / validation βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
+
pydantic>=2.0
|
| 16 |
+
pydantic-settings>=2.0
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# ββ Utilities ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
python-dotenv>=1.0
|
setup.sh
CHANGED
|
@@ -10,10 +10,8 @@ ok() { printf "\033[1;32m==> %s\033[0m\n" "$1"; }
|
|
| 10 |
warn() { printf "\033[1;33m==> %s\033[0m\n" "$1"; }
|
| 11 |
fail() { printf "\033[1;31mERROR: %s\033[0m\n" "$1"; exit 1; }
|
| 12 |
|
| 13 |
-
# ββ Pre-flight: conda ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
command -v conda >/dev/null 2>&1 || fail "conda not found. Install Miniconda/Anaconda first."
|
| 15 |
|
| 16 |
-
# ββ Conda environment ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
if conda info --envs | grep -q "^${CONDA_ENV} "; then
|
| 18 |
info "Conda env '$CONDA_ENV' already exists β reusing it"
|
| 19 |
else
|
|
@@ -26,38 +24,29 @@ fi
|
|
| 26 |
eval "$(conda shell.bash hook)"
|
| 27 |
conda activate "$CONDA_ENV"
|
| 28 |
|
| 29 |
-
# ββ Install dependencies βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
info "Installing Python dependencies..."
|
| 31 |
pip install --upgrade pip --quiet
|
| 32 |
pip install -r requirements.txt --quiet
|
| 33 |
ok "Dependencies installed"
|
| 34 |
|
| 35 |
-
# ββ Environment file βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
if [ -f "$ENV_FILE" ]; then
|
| 37 |
warn ".env already exists β skipping copy (review $ENV_EXAMPLE for new vars)"
|
| 38 |
else
|
| 39 |
info "Copying $ENV_EXAMPLE β $ENV_FILE..."
|
| 40 |
cp "$ENV_EXAMPLE" "$ENV_FILE"
|
| 41 |
-
ok ".env created β edit it to configure
|
| 42 |
fi
|
| 43 |
|
| 44 |
-
|
| 45 |
-
info "Building FAISS indexes (downloads BGE embedder + reranker on first run)..."
|
| 46 |
python -m backend.retrieval.vector_store
|
| 47 |
-
ok "
|
| 48 |
|
| 49 |
-
#
|
|
|
|
| 50 |
if ! command -v ollama >/dev/null 2>&1; then
|
| 51 |
warn "Ollama not installed β install it from https://ollama.com then re-run this script"
|
| 52 |
-
else
|
| 53 |
-
LOCAL_MODEL=$(grep -E '^LOCAL_MODEL=' "$ENV_FILE" 2>/dev/null | cut -d= -f2 | sed 's/#.*//' | tr -d ' ' || echo "qwen3:8b")
|
| 54 |
-
[ -z "$LOCAL_MODEL" ] && LOCAL_MODEL="qwen3:8b"
|
| 55 |
-
info "Pulling Ollama model: $LOCAL_MODEL (skips if already pulled)..."
|
| 56 |
-
ollama pull "$LOCAL_MODEL"
|
| 57 |
-
ok "Ollama model $LOCAL_MODEL ready"
|
| 58 |
fi
|
| 59 |
|
| 60 |
-
# ββ Frontend dependencies ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
if command -v pnpm >/dev/null 2>&1; then
|
| 62 |
info "Installing frontend dependencies..."
|
| 63 |
pnpm --dir frontend install --silent
|
|
@@ -66,7 +55,6 @@ else
|
|
| 66 |
warn "pnpm not found β install it (npm i -g pnpm) then run: pnpm --dir frontend install"
|
| 67 |
fi
|
| 68 |
|
| 69 |
-
# ββ Done ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
echo ""
|
| 71 |
ok "Setup complete!"
|
| 72 |
echo ""
|
|
|
|
| 10 |
warn() { printf "\033[1;33m==> %s\033[0m\n" "$1"; }
|
| 11 |
fail() { printf "\033[1;31mERROR: %s\033[0m\n" "$1"; exit 1; }
|
| 12 |
|
|
|
|
| 13 |
command -v conda >/dev/null 2>&1 || fail "conda not found. Install Miniconda/Anaconda first."
|
| 14 |
|
|
|
|
| 15 |
if conda info --envs | grep -q "^${CONDA_ENV} "; then
|
| 16 |
info "Conda env '$CONDA_ENV' already exists β reusing it"
|
| 17 |
else
|
|
|
|
| 24 |
eval "$(conda shell.bash hook)"
|
| 25 |
conda activate "$CONDA_ENV"
|
| 26 |
|
|
|
|
| 27 |
info "Installing Python dependencies..."
|
| 28 |
pip install --upgrade pip --quiet
|
| 29 |
pip install -r requirements.txt --quiet
|
| 30 |
ok "Dependencies installed"
|
| 31 |
|
|
|
|
| 32 |
if [ -f "$ENV_FILE" ]; then
|
| 33 |
warn ".env already exists β skipping copy (review $ENV_EXAMPLE for new vars)"
|
| 34 |
else
|
| 35 |
info "Copying $ENV_EXAMPLE β $ENV_FILE..."
|
| 36 |
cp "$ENV_EXAMPLE" "$ENV_FILE"
|
| 37 |
+
ok ".env created β edit it to configure Ollama Cloud model names"
|
| 38 |
fi
|
| 39 |
|
| 40 |
+
info "Building vector indexes (downloads BGE-small embedder on first run)..."
|
|
|
|
| 41 |
python -m backend.retrieval.vector_store
|
| 42 |
+
ok "Vector indexes built in data/faiss_store/"
|
| 43 |
|
| 44 |
+
# Ollama: tiers point at Ollama Cloud β no local pull needed. Just check the
|
| 45 |
+
# daemon is reachable so the OpenAI-compatible proxy works.
|
| 46 |
if ! command -v ollama >/dev/null 2>&1; then
|
| 47 |
warn "Ollama not installed β install it from https://ollama.com then re-run this script"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
fi
|
| 49 |
|
|
|
|
| 50 |
if command -v pnpm >/dev/null 2>&1; then
|
| 51 |
info "Installing frontend dependencies..."
|
| 52 |
pnpm --dir frontend install --silent
|
|
|
|
| 55 |
warn "pnpm not found β install it (npm i -g pnpm) then run: pnpm --dir frontend install"
|
| 56 |
fi
|
| 57 |
|
|
|
|
| 58 |
echo ""
|
| 59 |
ok "Setup complete!"
|
| 60 |
echo ""
|