nothex commited on
Commit
c1512e9
Β·
1 Parent(s): 6fce572

feat: implement ingestion backoff, CI build modes, and rebrand to Morpheus

Browse files

- **Resilience**: Added exponential backoff and retry loops for vector embeddings (`FallbackEmbeddings`) and Supabase RPC document inserts, preventing 429 crashes during heavy ingestion.
- **Telemetry**: Added `ingestion_retry_logs` tracking to monitor database rate limits.
- **Build Stability**: Introduced `NEXUS_BUILD_ASSETS_MODE` (light/full) to conditionally control heavy ML asset downloads and intent model training during Docker builds.
- **Graceful Degradation**: Bypassed Supabase operations in `intent_classifier` if credentials are missing, preventing crashes in CI environments.
- **Observability**: Added `/health/details` endpoint exposing the intent classifier's readiness and bootstrap status.
- **Rebranding**: Renamed the project from "NEXUS" to "Morpheus" across FastAPI configs, frontend UI, logging, and chat agent personas.

.claude/settings.local.json CHANGED
@@ -1,36 +1,43 @@
1
  {
 
 
 
 
 
 
 
2
  "hooks": {
3
- "PreCompact": [
4
  {
 
5
  "hooks": [
6
  {
7
  "type": "command",
8
  "command": "powershell -NoProfile -File \"D:/Work/Projects/proj/.dual-graph/prime.ps1\""
9
  }
10
- ],
11
- "matcher": ""
12
  }
13
  ],
14
  "Stop": [
15
  {
 
16
  "hooks": [
17
  {
18
  "type": "command",
19
  "command": "powershell -NoProfile -File \"D:/Work/Projects/proj/.dual-graph/stop_hook.ps1\""
20
  }
21
- ],
22
- "matcher": ""
23
  }
24
  ],
25
- "SessionStart": [
26
  {
 
27
  "hooks": [
28
  {
29
  "type": "command",
30
  "command": "powershell -NoProfile -File \"D:/Work/Projects/proj/.dual-graph/prime.ps1\""
31
  }
32
- ],
33
- "matcher": ""
34
  }
35
  ]
36
  }
 
1
  {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(where launch:*)",
5
+ "Bash(ls -la D:/Work/Projects/proj/launch*)",
6
+ "mcp__dual-graph__graph_read"
7
+ ]
8
+ },
9
  "hooks": {
10
+ "SessionStart": [
11
  {
12
+ "matcher": "",
13
  "hooks": [
14
  {
15
  "type": "command",
16
  "command": "powershell -NoProfile -File \"D:/Work/Projects/proj/.dual-graph/prime.ps1\""
17
  }
18
+ ]
 
19
  }
20
  ],
21
  "Stop": [
22
  {
23
+ "matcher": "",
24
  "hooks": [
25
  {
26
  "type": "command",
27
  "command": "powershell -NoProfile -File \"D:/Work/Projects/proj/.dual-graph/stop_hook.ps1\""
28
  }
29
+ ]
 
30
  }
31
  ],
32
+ "PreCompact": [
33
  {
34
+ "matcher": "",
35
  "hooks": [
36
  {
37
  "type": "command",
38
  "command": "powershell -NoProfile -File \"D:/Work/Projects/proj/.dual-graph/prime.ps1\""
39
  }
40
+ ]
 
41
  }
42
  ]
43
  }
.github/workflows/smoke.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Smoke Checks
2
+
3
+ on:
4
+ push:
5
+ branches: ["**"]
6
+ pull_request:
7
+ branches: ["**"]
8
+
9
+ jobs:
10
+ smoke:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Setup Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: Build assets (light mode)
27
+ env:
28
+ NEXUS_DISABLE_INTENT_BOOTSTRAP: "true"
29
+ NEXUS_BUILD_ASSETS_MODE: "light"
30
+ run: |
31
+ python -m backend.core.build_ml_assets
32
+
33
+ - name: Intent classifier smoke predict
34
+ env:
35
+ NEXUS_DISABLE_INTENT_BOOTSTRAP: "true"
36
+ run: |
37
+ python -c "from backend.core.intent_classifier import intent_classifier as ic; print(ic.predict('what are the key points?', False, False))"
Dockerfile CHANGED
@@ -28,8 +28,14 @@ RUN pip install --no-cache-dir -r requirements.txt
28
  COPY --chown=user:user . .
29
 
30
  # 7. Pre-build ML assets (downloads models to cache, trains intent classifier)
31
- RUN python -m backend.core.build_ml_assets
 
 
 
 
 
 
32
 
33
  # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT)
34
  ENV PORT=7860
35
- CMD uvicorn backend.main:app --host 0.0.0.0 --port $PORT
 
28
  COPY --chown=user:user . .
29
 
30
  # 7. Pre-build ML assets (downloads models to cache, trains intent classifier)
31
+ ARG PREBUILD_ML_ASSETS=1
32
+ ARG NEXUS_BUILD_ASSETS_MODE=light
33
+ RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \
34
+ NEXUS_BUILD_ASSETS_MODE=$NEXUS_BUILD_ASSETS_MODE python -m backend.core.build_ml_assets ; \
35
+ else \
36
+ echo "Skipping ML asset pre-build"; \
37
+ fi
38
 
39
  # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT)
40
  ENV PORT=7860
41
+ CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT}"]
NEXUS_PROJECT_GUIDE.md ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NEXUS β€” Complete Project Understanding Guide
2
+
3
+ > Read this when starting a new session, onboarding someone, or after a long break.
4
+ > Everything you built, how it connects, and why decisions were made.
5
+
6
+ ---
7
+
8
+ ## What Is NEXUS?
9
+
10
+ NEXUS is a **multi-tenant RAG (Retrieval-Augmented Generation) platform**.
11
+
12
+ Users upload PDF documents. They ask questions in natural language. NEXUS finds the most relevant passages and uses an AI to generate accurate answers with source citations and confidence scores.
13
+
14
+ **What makes it non-trivial:**
15
+ - Each user sees only their own documents, enforced at the database level (RLS)
16
+ - Retrieval combines keyword search + semantic search + neural reranking
17
+ - The intent classifier learns from user behaviour and retrains automatically
18
+ - Similar questions get instant answers from a semantic cache
19
+ - Conversations are remembered across sessions (episodic memory)
20
+ - If one AI provider fails, the system automatically tries the next one
21
+ - Documents are indexed as hierarchical RAPTOR trees, not just flat chunks
22
+
23
+ ---
24
+
25
+ ## Project Structure
26
+
27
+ ```
28
+ proj/
29
+ β”œβ”€β”€ app.py # FastAPI entry point
30
+ β”œβ”€β”€ backend/
31
+ β”‚ β”œβ”€β”€ main.py # App startup, Celery worker, rate limiter
32
+ β”‚ β”œβ”€β”€ api/
33
+ β”‚ β”‚ β”œβ”€β”€ auth.py # /api/v1/auth/* β€” login, /me, /verify
34
+ β”‚ β”‚ β”œβ”€β”€ query.py # /api/v1/query β€” SSE streaming RAG endpoint
35
+ β”‚ β”‚ β”œβ”€β”€ corpus.py # /api/v1/corpus/* β€” files, delete, rename, recategorise
36
+ β”‚ β”‚ β”œβ”€β”€ ingest.py # /api/v1/ingest/* β€” upload, status polling
37
+ β”‚ β”‚ β”œβ”€β”€ frontend_config.py # /api/v1/config β€” serves Supabase keys to frontend
38
+ β”‚ β”‚ └── admin.py # /api/v1/admin β€” master key, daily code
39
+ β”‚ └── core/
40
+ β”‚ β”œβ”€β”€ pipeline.py # The entire brain β€” ingestion + retrieval + generation
41
+ β”‚ β”œβ”€β”€ providers.py # All AI provider wrappers (Groq, Gemini, OpenRouter)
42
+ β”‚ β”œβ”€β”€ classifier.py # 3-stage document classifier
43
+ β”‚ β”œβ”€β”€ intent_classifier.py # Neural intent classifier (sklearn, retrains online)
44
+ β”‚ β”œβ”€β”€ cache_manager.py # Semantic Redis cache with version invalidation
45
+ β”‚ β”œβ”€β”€ auth_utils.py # JWT decoding helpers
46
+ β”‚ β”œβ”€β”€ config.py # All constants and model lists
47
+ β”‚ └── tasks.py # Celery task definition
48
+ β”œβ”€β”€ frontend/
49
+ β”‚ β”œβ”€β”€ index.html
50
+ β”‚ └── js/
51
+ β”‚ β”œβ”€β”€ main.js # Boot sequence, auth gate, tab hook
52
+ β”‚ β”œβ”€β”€ api.js # All fetch() calls β€” single source of truth
53
+ β”‚ β”œβ”€β”€ state.js # Global STATE object
54
+ β”‚ β”œβ”€β”€ corpus.js # Upload, document list, category review
55
+ β”‚ β”œβ”€β”€ chat.js # Chat UI, streaming token renderer
56
+ β”‚ β”œβ”€β”€ graph.js # D3 force-directed graph
57
+ β”‚ β”œβ”€β”€ inspect.js # Right-panel node inspector
58
+ β”‚ β”œβ”€β”€ ui.js # switchView, toast, shared UI utils
59
+ β”‚ └── config.js # CONFIG object, initSupabase()
60
+ └── supabase/
61
+ β”œβ”€β”€ rls/multi_tenancy_rls.sql
62
+ └── migrations/
63
+ ```
64
+
65
+ ---
66
+
67
+ ## The Database (Supabase / PostgreSQL)
68
+
69
+ ### Tables
70
+
71
+ **`documents`** β€” The vector store. Every chunk from every PDF lives here.
72
+
73
+ | Column | Type | Purpose |
74
+ |--------|------|---------|
75
+ | `id` | uuid | Chunk ID (deterministic: uuid5 of file_hash + chunk_index) |
76
+ | `content` | text | The chunk text that gets searched |
77
+ | `metadata` | jsonb | Source, file_hash, document_type, page_numbers, chunk_index, relevance_score |
78
+ | `embedding` | vector(2048) | The nvidia-nemotron embedding for pgvector semantic search |
79
+ | `user_id` | uuid | RLS tenant isolation β€” set by insert_document_chunk RPC |
80
+
81
+ **`ingested_files`** β€” Dedup registry. Checked before every upload.
82
+
83
+ | Column | Type | Purpose |
84
+ |--------|------|---------|
85
+ | `file_hash` | text | SHA-256 of the PDF β€” the dedup key |
86
+ | `filename` | text | Display name shown in the UI |
87
+ | `document_type` | text | Category e.g. academic_syllabus |
88
+ | `chunk_count` | int | How many chunks (includes RAPTOR tree nodes) |
89
+ | `user_id` | uuid | Tenant isolation |
90
+ | `user_overridden` | bool | True if user manually changed the category β€” classifier skips when true |
91
+
92
+ **`chat_memory`** β€” Episodic memory. Past Q&A pairs, searchable by semantic similarity.
93
+
94
+ | Column | Type | Purpose |
95
+ |--------|------|---------|
96
+ | `session_id` | text | Groups messages from the same conversation |
97
+ | `role` | text | "user" or "assistant" |
98
+ | `content` | text | The message |
99
+ | `embedding` | vector | For semantic search via match_memory RPC |
100
+ | `user_id` | uuid | Tenant isolation |
101
+
102
+ **`category_centroids`** β€” The classifier's learned memory.
103
+
104
+ | Column | Type | Purpose |
105
+ |--------|------|---------|
106
+ | `document_type` | text | Category label |
107
+ | `centroid_vector` | array | Running average embedding of all docs of this type |
108
+ | `document_count` | int | How many documents contributed |
109
+ | `user_id` | uuid | Per-tenant centroids |
110
+
111
+ **`evaluation_logs`** β€” RAGAS quality metrics. Written after every query.
112
+
113
+ | Column | Type | Purpose |
114
+ |--------|------|---------|
115
+ | `run_label` | text | "production" for live, timestamp for offline eval runs |
116
+ | `question` | text | The query |
117
+ | `relevance_proxy` | float | Average Cohere relevance score across chunks |
118
+ | `precision_at_k` | float | Max Cohere relevance score (most relevant chunk) |
119
+ | `final_score` | float | Best quality proxy available online |
120
+
121
+ **`rerank_feedback`** β€” Every Cohere rerank decision, stored for future CrossEncoder distillation.
122
+
123
+ **`intent_feedback`** β€” Online training data for the intent classifier.
124
+
125
+ **`mv_document_types`** β€” Materialized view. The category filter dropdown reads from here.
126
+
127
+ ### Supabase RPC Functions
128
+
129
+ These are stored procedures called from Python like regular functions:
130
+
131
+ - `hybrid_search(query_text, query_embedding, match_count, filter, semantic_weight, keyword_weight)` β€” Combined BM25 + pgvector search
132
+ - `match_memory(query_embedding, match_session_id, match_count)` β€” Semantic search over chat history
133
+ - `insert_document_chunk(p_id, p_content, p_metadata, p_embedding, p_user_id)` β€” Secure insert that bypasses RLS by accepting user_id explicitly
134
+ - `refresh_document_types_mv()` β€” Refreshes the category filter view
135
+ - `get_document_types()` β€” Returns distinct categories for this tenant
136
+
137
+ ### Row Level Security
138
+
139
+ Every table has RLS policies. The core rule: `user_id = auth.uid()` for reads. Writes from Celery workers use the service role key but always inject `user_id` explicitly via the `insert_document_chunk` RPC. This means the security check happens at the API boundary (JWT validation) and the database enforces it for reads.
140
+
141
+ ---
142
+
143
+ ## The Ingestion Pipeline
144
+
145
+ When a user uploads a PDF:
146
+
147
+ ```
148
+ Browser
149
+ POST /api/v1/ingest/upload
150
+ FastAPI validates JWT (require_auth_token)
151
+ Saves PDF to temp file
152
+ process_pdf_task.delay() pushes to Redis queue
153
+ Returns {task_id} immediately (200 OK)
154
+ Browser polls /api/v1/ingest/status/{task_id} every 2 seconds
155
+
156
+ Celery worker:
157
+
158
+ Step 1: SHA-256 fingerprint
159
+ Check ingested_files for this hash (O(1) indexed lookup)
160
+ If found: return "already_ingested"
161
+ Check if user_overridden=True: load forced_category, skip classifier
162
+
163
+ Step 2: PDF partitioning (unstructured library)
164
+ partition_pdf() β€” OCR + layout detection
165
+ extract_images_from_pdf() β€” PyMuPDF, filters tiny/skewed images
166
+ Returns list of Element objects (Title, NarrativeText, Table, Image...)
167
+
168
+ Step 3: Classification (classifier.py)
169
+ Three-stage cascade:
170
+ Stage 1: Centroid nearest-neighbour (cosine similarity, no API call)
171
+ If confidence >= 0.72: done
172
+ Stage 2: Ensemble vote (centroid + label-embed + TF-IDF)
173
+ If confidence >= 0.38: done
174
+ Stage 3: LLM chain-of-thought (last resort for novel document types)
175
+ Special: Sparse/tabular pre-check routes to visual classification
176
+ If user_overridden=True: skip all stages, use forced_category directly
177
+
178
+ Step 4: Chunking + AI summaries
179
+ chunk_by_title() groups elements into logical sections
180
+ For chunks with tables or images: parallel AI vision summarisation (5 workers)
181
+ Each chunk becomes a LangChain Document with rich metadata
182
+
183
+ Step 5: RAPTOR tree indexing
184
+ Groups leaf chunks into clusters of 5
185
+ Generates LLM parent summary for each cluster
186
+ Repeats until single root node
187
+ All nodes (leaves + summaries) get uploaded
188
+ Root node answers "what is this document about?"
189
+ Leaf nodes answer specific detail questions
190
+
191
+ Step 6: Embedding + upload
192
+ Batch embed all nodes via nvidia-nemotron (2048 dims)
193
+ Insert each via insert_document_chunk RPC (explicit user_id, no RLS issue)
194
+ Register in ingested_files
195
+ Invalidate semantic cache for this user (kb_version++)
196
+ ```
197
+
198
+ ---
199
+
200
+ ## The Retrieval Pipeline
201
+
202
+ When a user asks a question:
203
+
204
+ ```
205
+ Browser
206
+ POST /api/v1/query {query, category, history, session_id, alpha}
207
+ X-Auth-Token header
208
+
209
+ FastAPI validates JWT, starts SSE streaming response
210
+
211
+ Step 1: Intent analysis (analyse_intent)
212
+ Local sklearn classifier, <5ms, no API call
213
+ Inputs: query text, has_category, has_history
214
+ Output: {is_clear, enriched_query, clarification_question}
215
+ If needs_clarification: stream question back, stop
216
+ After 2 consecutive clarification turns: proceed regardless
217
+ Enrichment: if reference query ("summarise it"), replace with previous query
218
+ Logs to intent_feedback for online retraining
219
+
220
+ Step 2: retrieve_chunks()
221
+ a) Follow-up detection
222
+ Short query (<=8 words) with pronouns (it/this/that/they)?
223
+ Reuse _last_chunks[session_key] β€” no re-search needed
224
+
225
+ b) Semantic cache check
226
+ Embed original query (256-entry in-memory LRU cache)
227
+ Scan Redis for cosine similarity >= 0.92
228
+ Hit: return __CACHE_HIT__ sentinel document
229
+
230
+ c) Query rewriting
231
+ LLM breaks query into 1-3 targeted sub-queries
232
+ Short queries (<=3 words) skip this step
233
+
234
+ d) Hybrid search (per sub-query)
235
+ hybrid_search RPC: BM25 keywords + pgvector semantics combined
236
+ alpha=0.5 means equal weight (user can adjust via slider)
237
+ Deduplicates across sub-queries by chunk ID
238
+ If category filter active: hard filter on document_type
239
+
240
+ e) Reranking (3-tier fallback)
241
+ Tier 1: Cohere rerank-multilingual-v3.0 (cloud API, best quality)
242
+ Tier 2: CrossEncoder ms-marco-MiniLM-L-6-v2 (local CUDA, free)
243
+ Tier 3: Lexical Jaccard similarity (pure Python, always works)
244
+ Relevance threshold: 0.35 (relaxed to 0.05 for small corpus)
245
+ Diversity filter: max 2 chunks per source, cross-category seeding
246
+ Context budget: trim if total chars > 14,000
247
+
248
+ f) Log rerank feedback (fire-and-forget thread)
249
+ All Cohere scores stored for future CrossEncoder distillation
250
+
251
+ Step 3: generate_answer_stream()
252
+ If __CACHE_HIT__: stream cached answer directly (skip LLM entirely)
253
+ Retrieve episodic memory: match_memory RPC (past relevant Q&A pairs)
254
+ Build prompt: system role + retrieved chunks + memories + history + query
255
+ Stream tokens via Groq (primary) -> Gemini -> OpenRouter fallback
256
+ After streaming: save Q&A pair to chat_memory with embeddings (thread)
257
+ Store in semantic cache (version key + TTL by document type)
258
+
259
+ Step 4: Emit sources
260
+ Collect metadata from retrieved chunks
261
+ Send {type: "done", sources: [...], images: [...]} SSE event
262
+ ```
263
+
264
+ ---
265
+
266
+ ## The Provider System
267
+
268
+ `providers.py` routes each task to the best available AI provider.
269
+
270
+ ```
271
+ ProviderFactory.build_chat_llm(purpose=...)
272
+
273
+ purpose="text" Groq (fast, generous limits) -> Gemini -> OpenRouter
274
+ purpose="ingestion" Gemini (1M context, good at summaries) -> OpenRouter
275
+ purpose="vision" Gemini (native multimodal) -> OpenRouter vision models
276
+ purpose="rewriter" OpenRouter (cheap per-call) -> Groq fallback
277
+ purpose="classifier" OpenRouter classifier models only
278
+ ```
279
+
280
+ **Current model lists:**
281
+ - Groq: `llama-4-scout-17b` -> `llama-3.3-70b-versatile` -> `qwen3-32b` -> `llama-3.1-8b-instant`
282
+ - Gemini: `gemini-2.5-flash` -> `gemini-2.5-flash-lite` (updated from deprecated 1.5/2.0)
283
+ - OpenRouter text: `stepfun/step-3.5-flash:free` -> `nvidia/nemotron-3-super-120b:free` -> `arcee-ai/trinity-large-preview:free` -> more
284
+ - Embeddings: `nvidia/llama-nemotron-embed-vl-1b-v2:free` (2048 dims) -> `text-embedding-3-small`
285
+
286
+ **FallbackEmbeddings null guard:** OpenRouter sometimes returns HTTP 200 with `data=null`. The guard raises `ValueError` on null response and retries the next model instead of crashing ingestion.
287
+
288
+ ---
289
+
290
+ ## The Semantic Cache
291
+
292
+ `cache_manager.py` β€” graduated invalidation with semantic similarity lookup.
293
+
294
+ **How it works:**
295
+ 1. Each user has a `kb_version` integer in Redis: `nexus:kb_version:{user_id}`
296
+ 2. Cache entries use version in key: `nexus:qcache:{user_id}:v{version}:...`
297
+ 3. On lookup: scan all entries for this user+version, find best cosine similarity match
298
+ 4. Hit threshold: 0.92 (strict to avoid returning wrong answers)
299
+ 5. On corpus change: `increment_kb_version()` -> version goes N to N+1
300
+ 6. Old v1 entries invisible under v2 β€” effectively invalidated
301
+
302
+ **Critical fix applied:** The version key must be written to Redis on first store via `r.setnx(version_key, kb_version)`. Without this, `r.incr()` on a non-existent key initialises to 0 then increments to 1 β€” same as the default β€” so old cache entries remain visible after delete.
303
+
304
+ **TTL by document type:** academic_syllabus/reference_chart = 7 days, technical_manual/research_paper = 3 days, financial_report/hr_policy = 1 day, general_document = 1 hour.
305
+
306
+ ---
307
+
308
+ ## The Intent Classifier
309
+
310
+ `intent_classifier.py` β€” sklearn-based, runs locally, under 5ms per query.
311
+
312
+ **What it classifies:** Does this query need clarification or is it clear enough to proceed?
313
+
314
+ **Features:** `has_category`, `has_history`, query text embedding via `all-MiniLM-L6-v2`.
315
+
316
+ **Online learning:** Every 25 queries logged to `intent_feedback`, the model retrains automatically and saves to `intent_model.pkl`. Currently at v2 with 158+ examples.
317
+
318
+ **Clarification limit:** After 2 consecutive clarification turns, the system proceeds regardless. Prevents the system from getting stuck in a clarification loop.
319
+
320
+ ---
321
+
322
+ ## The Document Classifier
323
+
324
+ `classifier.py` β€” three-stage cascade.
325
+
326
+ ```
327
+ Incoming document
328
+ |
329
+ v
330
+ Sparse/tabular pre-check (words < 200 OR unique_ratio > 0.85)
331
+ YES: visual classification (structural fingerprint to LLM)
332
+ NO: continue
333
+ |
334
+ v
335
+ Stage 1: Centroid nearest-neighbour
336
+ Cosine similarity to stored category centroids
337
+ Confidence >= 0.72: done
338
+ |
339
+ v
340
+ Stage 2: Ensemble vote
341
+ Signal A: cosine to known centroids (weight 0.45)
342
+ Signal B: cosine to category label embeddings (weight 0.30)
343
+ Signal C: TF-IDF keyword matching (weight 0.25)
344
+ Score >= 0.38: done
345
+ |
346
+ v
347
+ Stage 3: LLM chain-of-thought
348
+ Sends excerpt to classifier LLM
349
+ Classifies FORMAT and STRUCTURE (not just topic)
350
+ Fallback: "general_document"
351
+ ```
352
+
353
+ After classification, the centroid is updated with this document's vector β€” the classifier learns with every ingestion.
354
+
355
+ **User override lock:** If `ingested_files.user_overridden=True` for this file hash, the entire classifier is skipped. Returns synthetic result with `stage_used="user_override"`, `confidence=1.0`.
356
+
357
+ ---
358
+
359
+ ## The Frontend
360
+
361
+ ### Authentication Flow
362
+ ```
363
+ Page load
364
+ initSupabase() fetches Supabase keys from /api/v1/config
365
+ supabaseClient.auth.getSession()
366
+ Session exists: showApp() + bootApp()
367
+ No session: showLogin()
368
+
369
+ Login
370
+ supabaseClient.auth.signInWithPassword(email, password)
371
+ JWT stored in localStorage by supabase-js automatically
372
+ Every request: getSupabaseToken() reads it from localStorage
373
+ Sent as X-Auth-Token header on every API call
374
+ Backend require_auth_token Depends() validates JWT and returns user_id
375
+ ```
376
+
377
+ ### Global State
378
+ `state.js` β€” the single source of truth for UI state:
379
+ - `STATE.files` β€” list of ingested documents from /api/v1/corpus/files
380
+ - `STATE.categories` β€” list of category strings
381
+ - `STATE.catColors` β€” color mapping for graph visualization
382
+ - `STATE.chatHistory` β€” current conversation turns
383
+ - `STATE.sessionId` β€” UUID generated per browser tab
384
+ - `STATE.simulation` β€” D3 force simulation reference
385
+ - `STATE.alpha` β€” retrieval weight slider (0=keyword, 1=semantic)
386
+ - `STATE.isThinking` β€” prevents double-submit
387
+
388
+ ### Upload + Progress
389
+ `corpus.js` β€” `processUpload()` calls `apiIngestFile()`, then enters `pollIngestStatus()` which is an infinite loop (no timeout) that exits only on COMPLETED or FAILED. Shows heartbeat messages cycling through pipeline stages while waiting.
390
+
391
+ ### Chat Streaming
392
+ `chat.js` β€” `sendChat()` has 500ms debounce guard. Creates the assistant bubble immediately with thinking dots. `async onToken()` yields to the browser with `await new Promise(r => setTimeout(r, 0))` after each token update so the DOM repaints during streaming rather than all at once at the end.
393
+
394
+ ### Graph
395
+ `graph.js` β€” Obsidian-style D3 force simulation. Key: `graphReheat()` uses `alpha(0.3)` not `alphaTarget(0.2)`. The alpha method sets current energy directly β€” works even when simulation has fully stopped. alphaTarget only sets where energy wants to decay toward, useless if simulation is already stopped. `onGraphTabVisible()` is called from `main.js` tab wrapper with 50ms delay for CSS display change to propagate before D3 reads panel dimensions.
396
+
397
+ ---
398
+
399
+ ## Complete Request Flow Example
400
+
401
+ ```
402
+ User asks "What are the core courses?"
403
+
404
+ 1. Browser POST /api/v1/query
405
+ Headers: X-Auth-Token: eyJ...
406
+ Body: {query, category="academic_syllabus", history, session_id, alpha=0.5}
407
+
408
+ 2. FastAPI: require_auth_token decodes JWT, returns user_id="ee903934..."
409
+
410
+ 3. analyse_intent()
411
+ sklearn: needs_clarification=False, conf=1.00
412
+ category active: enriched = "query academic_syllabus"
413
+ Logs to intent_feedback
414
+
415
+ 4. retrieve_chunks()
416
+ No follow-up pronouns in query
417
+ Cache check: MISS (first time this query)
418
+ generate_sub_queries -> ["B.Tech CSE core courses", "program core credits", ...]
419
+ hybrid_search RPC x3 sub-queries -> 12 raw candidates
420
+ Cohere rerank -> ranked by relevance score
421
+ Threshold + diversity filter -> 3 final chunks
422
+ Store in _last_chunks[session_key]
423
+ Log rerank feedback (background thread)
424
+
425
+ 5. generate_answer_stream()
426
+ No __CACHE_HIT__ sentinel
427
+ match_memory RPC -> 2 past relevant Q&A pairs from this session
428
+ Build prompt: system + 3 chunks + 2 memories + history + query
429
+ Groq astream() -> tokens arrive one by one
430
+ Yield {type:"token", content:"The"}, {type:"token", content:" core"}, ...
431
+ After streaming: save Q&A to chat_memory (background thread)
432
+ Store in semantic cache (version v4, TTL 3600s)
433
+
434
+ 6. Yield {type:"done", sources:[...], images:[...]}
435
+
436
+ 7. Browser: onToken() fills bubble token by token with DOM repaint
437
+ onDone() appends source chips (filtering __CACHE_HIT__ sentinels)
438
+ ```
439
+
440
+ ---
441
+
442
+ ## Key Design Decisions
443
+
444
+ **Why Celery + Redis?** Ingestion takes 60-120 seconds (OCR, AI summaries, RAPTOR). FastAPI requests time out before that. Celery lets the task run in background while the browser polls for status.
445
+
446
+ **Why service role key for writes?** Celery workers have no browser session so `auth.uid()` is NULL. Security boundary is at the API level (JWT validation). The `insert_document_chunk` RPC accepts `user_id` as an explicit verified parameter extracted from the JWT.
447
+
448
+ **Why RAPTOR tree indexing?** Flat chunking misses questions that span multiple sections ("total credits across all categories"). RAPTOR builds parent summaries aggregating child content, enabling retrieval at multiple granularities β€” root nodes for overview questions, leaf nodes for specific details.
449
+
450
+ **Why semantic cache with version invalidation?** Repeated questions should not cost API calls. But cached answers must go stale when corpus changes. Version invalidation solves the second problem without needing to track which cache entry references which document β€” increment version, all old entries become invisible.
451
+
452
+ **Why 3-tier reranker?** Cohere costs money and has rate limits. CrossEncoder is free but needs local GPU. Lexical always works. This order maximises quality while guaranteeing retrieval never fails completely.
453
+
454
+ **Why `alpha(0.3)` not `alphaTarget(0.2)` in graph reheat?** alphaTarget sets where the simulation wants to decay toward. If the simulation has already stopped (alpha < alphaMin = 0.001), alphaTarget does nothing β€” simulation stays stopped. The alpha method sets current energy directly and always forces a restart.
455
+
456
+ ---
457
+
458
+ ## Environment Variables
459
+
460
+ ```
461
+ SUPABASE_URL=https://....supabase.co
462
+ SUPABASE_ANON_KEY=eyJ... # Frontend-safe, used for user-scoped reads
463
+ SUPABASE_SERVICE_KEY=eyJ... # Server-only, bypasses RLS for writes
464
+ OPENROUTER_API_KEY=sk-or-...
465
+ OPEN_ROUTER_BASE_URL=https://openrouter.ai/api/v1
466
+ GROQ_API_KEY=gsk_...
467
+ GEMINI_API_KEY=AI...
468
+ COHERE_API_KEY=lm1X...
469
+ REDIS_URL=redis://default:...@...redislabs.com:10519/0
470
+ MASTER_ADMIN_KEY=...
471
+ LOG_LEVEL=INFO
472
+ ```
473
+
474
+ ---
475
+
476
+ ## Test Status
477
+
478
+ | Test | Status | Notes |
479
+ |------|--------|-------|
480
+ | T2.2 Cross-section question | PASS | 160 credits answered correctly |
481
+ | T2.3 Keyword-specific (Capstone) | FAIL | 0 chunks β€” needs PageIndex |
482
+ | T2.4 Out-of-corpus | PASS | Returns clean "No relevant documents" |
483
+ | T2.5 Category filter | PASS | Hard filter active in logs |
484
+ | T3.1 Vague no context | NOT RUN | |
485
+ | T3.2 Vague with category | NOT RUN | |
486
+ | T3.3 Clarification limit | NOT RUN | |
487
+ | T3.4 Follow-up detection | PASS | Reusing cached chunks confirmed |
488
+ | T-Cache.2 Same query cache hit | PASS | similarity 1.000 |
489
+ | T-Cache.3 Delete invalidates cache | PASS | v2 to v3 on delete |
490
+ | T-Provider Groq fallback | PASS | Groq 200 OK after Gemini 404 |
491
+ | T-Override User category lock | NOT RUN | Implemented, not tested |
492
+ | T-ErrorMsg Error message format | NOT RUN | Implemented, not tested |
493
+
494
+ ---
495
+
496
+ ## What Is Next
497
+
498
+ ### Before showing to anyone
499
+ - README + architecture doc
500
+ - Deployment to Railway or HF Spaces (currently localhost only)
501
+ - Rate limiting per user on query endpoint (60/hour)
502
+ - Run remaining tests T3.1-T3.3, T-Override, T-ErrorMsg
503
+
504
+ ### Next major feature β€” PageIndex
505
+ Fixes T2.3. The Capstone Project (DSN4097, 8 credits) is buried in a table in chunk 1 alongside 17 other items. Vector search and Cohere both miss it because the chunk summary emphasises overall credit structure, not individual items. PageIndex builds a hierarchical tree index from document structure and uses LLM reasoning to navigate it β€” not similarity search.
506
+
507
+ Build order:
508
+ 1. Fork PageIndex, swap OpenAI for FallbackChatLLM
509
+ 2. Add tree generation as optional step in run_ingestion()
510
+ 3. New table: `document_trees` (file_hash, tree_json, user_id)
511
+ 4. `route_query()`: vector path for simple queries, tree path for structured docs
512
+ 5. D3 graph: show tree nodes when clicking into a document
513
+
514
+ ### After PageIndex
515
+ - SetFit intent classifier upgrade (158+ examples, enough now)
516
+ - 3-class intent: clear / clarify / follow_up (follow_up currently heuristic)
517
+ - Corpus health dashboard (chunks, coverage, cache hit rate, avg relevance)
518
+ - Predictive cache prefetching
519
+
520
+ ---
521
+
522
+ ## The Three Self-Improvement Loops
523
+
524
+ NEXUS has three feedback loops that make it smarter over time:
525
+
526
+ **Loop 1 β€” Intent classifier (every 25 queries)**
527
+ User queries logged to intent_feedback. Every 25 rows, classifier retrains on accumulated examples. Learns your users' specific query patterns over time.
528
+
529
+ **Loop 2 β€” Document classifier (every ingestion)**
530
+ Each ingested document updates its category centroid. Next similar document gets Stage 1 centroid match instead of needing LLM. Classification gets faster and more accurate as corpus grows.
531
+
532
+ **Loop 3 β€” Reranker distillation (background, future)**
533
+ Every query logs Cohere rerank scores to rerank_feedback. Accumulated labels will be used to train local CrossEncoder to match Cohere quality without the API cost.
534
+
535
+ ---
536
+
537
+ ## Common Debugging
538
+
539
+ **Ingestion crashes at step 5 (embedding)**
540
+ Look for: `ValueError: Model X returned null embeddings`
541
+ Cause: OpenRouter returns HTTP 200 with data=null
542
+ Fix: FallbackEmbeddings null guard retries next model β€” should be in providers.py
543
+
544
+ **Cache not invalidating after delete**
545
+ Check Redis for key `nexus:kb_version:{user_id}`
546
+ If missing: first ingest happened before the setnx fix was applied
547
+ Fix: run a fresh ingest β€” `store_cached_answer()` calls `r.setnx()` which writes the key
548
+
549
+ **Graph not reheating on tab switch**
550
+ Check: `onGraphTabVisible` defined at bottom of graph.js
551
+ Check: `_hookGraphTabVisible` IIFE at bottom of main.js
552
+ Expected: graph animates within 50ms of tab click
553
+
554
+ **Classifier ignoring user category**
555
+ Check: `ingested_files.user_overridden = true` for that file hash
556
+ Look for in logs: `User override active β€” forcing category 'X', skipping classifier`
557
+
558
+ **__CACHE_HIT__ showing as source chip**
559
+ Hard refresh browser (Ctrl+Shift+R) to load new chat.js
560
+ The `visibleSources` filter in `onDone()` strips it
561
+
562
+ **Gemini 404 errors during ingestion**
563
+ Check config.py `GEMINI_TEXT_MODELS` and `GEMINI_VISION_MODELS`
564
+ Must be `gemini-2.5-flash` and `gemini-2.5-flash-lite`
565
+ `gemini-1.5-flash` and `gemini-2.0-flash` are deprecated
566
+
567
+ ---
568
+
569
+ *Last updated: March 2026*
README.md CHANGED
@@ -92,6 +92,15 @@ pip install -r backend/requirements.txt
92
  uvicorn backend.main:app --reload --port 8000
93
  ```
94
 
 
 
 
 
 
 
 
 
 
95
  ### 4. Open the frontend
96
  Open `frontend/index.html` in your browser.
97
  `config.js` already points to `http://localhost:8000`.
@@ -113,6 +122,14 @@ curl -X POST http://localhost:8000/api/v1/admin/warmup \
113
  4. Add all your `.env` values in the Render dashboard (Environment tab)
114
  5. Deploy β€” copy your Render URL (e.g. `https://nexus-api.onrender.com`)
115
 
 
 
 
 
 
 
 
 
116
  ### Frontend β†’ Vercel
117
  1. Update `API_URL` in `frontend/js/config.js` to your Render URL
118
  2. Go to [vercel.com](https://vercel.com) β†’ New Project β†’ connect your repo
@@ -136,6 +153,8 @@ All routes are at `/api/v1/`. Interactive docs at `/docs` (disable in prod via `
136
 
137
  | Method | Path | Description |
138
  |--------|------|-------------|
 
 
139
  | POST | `/auth/verify` | Verify daily guest password |
140
  | POST | `/auth/admin` | Verify master key, get today's code |
141
  | GET | `/corpus/files` | List all ingested files |
@@ -158,3 +177,8 @@ The project is designed so you can scale each layer independently:
158
  - **Add JWT auth**: Swap `verify_password()` in `api/auth.py` for JWT issuance. `services/auth.py` is unchanged.
159
  - **Add new corpus operations**: Add a route to `api/corpus.py`. `pipeline.py` is unchanged.
160
  - **Add new document types**: The classifier learns them automatically. Run warmup to reinforce centroids.
 
 
 
 
 
 
92
  uvicorn backend.main:app --reload --port 8000
93
  ```
94
 
95
+ ### Optional: pre-build ML assets locally
96
+ ```bash
97
+ # Light mode (downloads embedder only; skips intent training)
98
+ NEXUS_BUILD_ASSETS_MODE=light python -m backend.core.build_ml_assets
99
+
100
+ # Full mode (downloads embedder + trains intent model)
101
+ NEXUS_BUILD_ASSETS_MODE=full python -m backend.core.build_ml_assets
102
+ ```
103
+
104
  ### 4. Open the frontend
105
  Open `frontend/index.html` in your browser.
106
  `config.js` already points to `http://localhost:8000`.
 
122
  4. Add all your `.env` values in the Render dashboard (Environment tab)
123
  5. Deploy β€” copy your Render URL (e.g. `https://nexus-api.onrender.com`)
124
 
125
+ #### Docker build tuning
126
+ The Dockerfile supports optional ML prebuild knobs:
127
+
128
+ - `PREBUILD_ML_ASSETS=1` (default) or `0`
129
+ - `NEXUS_BUILD_ASSETS_MODE=light` (default) or `full`
130
+
131
+ `light` is recommended for stability and faster builds.
132
+
133
  ### Frontend β†’ Vercel
134
  1. Update `API_URL` in `frontend/js/config.js` to your Render URL
135
  2. Go to [vercel.com](https://vercel.com) β†’ New Project β†’ connect your repo
 
153
 
154
  | Method | Path | Description |
155
  |--------|------|-------------|
156
+ | GET | `/health` | Basic liveness check |
157
+ | GET | `/health/details` | Liveness + intent classifier readiness |
158
  | POST | `/auth/verify` | Verify daily guest password |
159
  | POST | `/auth/admin` | Verify master key, get today's code |
160
  | GET | `/corpus/files` | List all ingested files |
 
177
  - **Add JWT auth**: Swap `verify_password()` in `api/auth.py` for JWT issuance. `services/auth.py` is unchanged.
178
  - **Add new corpus operations**: Add a route to `api/corpus.py`. `pipeline.py` is unchanged.
179
  - **Add new document types**: The classifier learns them automatically. Run warmup to reinforce centroids.
180
+
181
+
182
+ docker stop $(docker ps -a -q)
183
+ docker build -t nexus-rag .
184
+ docker run -p 8000:7860 --env-file .env nexus-rag
backend/core/build_ml_assets.py CHANGED
@@ -9,7 +9,6 @@ triggering heavy downloads or training loops on the first request.
9
 
10
  import os
11
  import logging
12
- from pathlib import Path
13
 
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
15
  log = logging.getLogger("nexus.build_assets")
@@ -17,31 +16,41 @@ log = logging.getLogger("nexus.build_assets")
17
  def build_assets():
18
  log.info("Starting ML asset pre-build...")
19
 
20
- # 1. Pre-download sentence-transformers (used by Intent Classifier)
21
- log.info("Downloading all-MiniLM-L6-v2 embedding model...")
22
- try:
23
- from sentence_transformers import SentenceTransformer
24
- _ = SentenceTransformer("all-MiniLM-L6-v2")
25
- log.info("Embedding model downloaded successfully.")
26
- except Exception as e:
27
- log.error("Failed to download embedding model: %s", e)
28
 
29
- # 2. Pre-train the Intent Classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # This creates backend/core/intent_model.pkl
31
- log.info("Training initial Intent Classifier...")
32
- try:
33
- from backend.core.intent_classifier import train_initial_model
34
-
35
- # We temporarily disable the Supabase upload during the build step
36
- # because the build environment might not have the Supabase keys yet,
37
- # and we only care about the local .pkl file for the container anyway.
38
- os.environ["SUPABASE_URL"] = "dummy"
39
- os.environ["SUPABASE_SERVICE_KEY"] = "dummy"
40
-
41
- train_initial_model()
42
- log.info("Intent Classifier trained and saved locally.")
43
- except Exception as e:
44
- log.error("Failed to train Intent Classifier: %s", e)
45
 
46
  log.info("ML asset pre-build complete.")
47
 
 
9
 
10
  import os
11
  import logging
 
12
 
13
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
14
  log = logging.getLogger("nexus.build_assets")
 
16
  def build_assets():
17
  log.info("Starting ML asset pre-build...")
18
 
19
+ # Build-time safety:
20
+ # Prevent intent_classifier singleton from starting background bootstrap
21
+ # threads while we run deterministic synchronous training below.
22
+ os.environ["NEXUS_DISABLE_INTENT_BOOTSTRAP"] = "true"
 
 
 
 
23
 
24
+ # In CI/build environments we may not have real Supabase credentials.
25
+ # Keep train/upload logic local-only in that case.
26
+ os.environ.setdefault("SUPABASE_URL", "")
27
+ os.environ.setdefault("SUPABASE_SERVICE_KEY", "")
28
+
29
+ mode = os.getenv("NEXUS_BUILD_ASSETS_MODE", "light").strip().lower()
30
+ log.info("Build asset mode: %s", mode)
31
+
32
+ # 1. Optional pre-download sentence-transformers (used by Intent Classifier)
33
+ if mode in {"light", "full"}:
34
+ log.info("Downloading all-MiniLM-L6-v2 embedding model...")
35
+ try:
36
+ from sentence_transformers import SentenceTransformer
37
+ _ = SentenceTransformer("all-MiniLM-L6-v2")
38
+ log.info("Embedding model downloaded successfully.")
39
+ except Exception as e:
40
+ log.error("Failed to download embedding model: %s", e)
41
+
42
+ # 2. Optional pre-train the Intent Classifier (heavy step)
43
  # This creates backend/core/intent_model.pkl
44
+ if mode == "full":
45
+ log.info("Training initial Intent Classifier...")
46
+ try:
47
+ from backend.core.intent_classifier import train_initial_model
48
+ train_initial_model()
49
+ log.info("Intent Classifier trained and saved locally.")
50
+ except Exception as e:
51
+ log.error("Failed to train Intent Classifier: %s", e)
52
+ else:
53
+ log.info("Skipping intent training in '%s' mode (runtime bootstrap handles it if enabled).", mode)
 
 
 
 
54
 
55
  log.info("ML asset pre-build complete.")
56
 
backend/core/config.py CHANGED
@@ -105,10 +105,14 @@ INTENT_MODEL_PATH = "backend/core/intent_model.pkl"
105
  INTENT_FEEDBACK_PATH = "backend/core/intent_feedback.jsonl"
106
  INTENT_RETRAIN_EVERY = 25
107
  INTENT_MIN_CONFIDENCE = 0.65
 
108
 
109
  # ==================== UPLOAD BATCHING ====================
110
- UPLOAD_BATCH_SIZE = 10
111
- UPLOAD_BATCH_SLEEP_S = 2
 
 
 
112
 
113
  # ==================== RETRIEVAL ====================
114
  CHAT_MEMORY_TURNS = 3
 
105
  INTENT_FEEDBACK_PATH = "backend/core/intent_feedback.jsonl"
106
  INTENT_RETRAIN_EVERY = 25
107
  INTENT_MIN_CONFIDENCE = 0.65
108
+ INTENT_BOOTSTRAP_ON_STARTUP = os.getenv("INTENT_BOOTSTRAP_ON_STARTUP", "false").lower() in {"1", "true", "yes"}
109
 
110
  # ==================== UPLOAD BATCHING ====================
111
+ UPLOAD_BATCH_SIZE = 5
112
+ UPLOAD_BATCH_SLEEP_S = 5
113
+ UPLOAD_RETRY_MAX_ATTEMPTS = int(os.getenv("UPLOAD_RETRY_MAX_ATTEMPTS", "4"))
114
+ UPLOAD_RETRY_BASE_SLEEP_S = float(os.getenv("UPLOAD_RETRY_BASE_SLEEP_S", "2"))
115
+ UPLOAD_RETRY_MAX_SLEEP_S = float(os.getenv("UPLOAD_RETRY_MAX_SLEEP_S", "20"))
116
 
117
  # ==================== RETRIEVAL ====================
118
  CHAT_MEMORY_TURNS = 3
backend/core/intent_classifier.py CHANGED
@@ -33,6 +33,18 @@ from supabase.client import create_client
33
 
34
  log = logging.getLogger("nexus.intent")
35
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # ── Lazy imports (heavy β€” only load once at first use) ───────────────────────
37
  _embedder = None
38
  _embedder_lock = threading.Lock()
@@ -228,7 +240,12 @@ def _build_features(query: str, has_category: bool, has_history: bool) -> np.nda
228
  [384:392] β€” 8 structural context signals
229
  """
230
  embedder = _get_embedder()
231
- query_embedding = embedder.encode(query, normalize_embeddings=True)
 
 
 
 
 
232
 
233
  q = query.lower().strip()
234
  words = q.split()
@@ -296,7 +313,9 @@ class IntentClassifier:
296
  else:
297
  log.info("No intent model found β€” will use fallback until trained.")
298
  self._ready = False
299
- if not getattr(self, "_bootstrap_started", False):
 
 
300
  self._bootstrap_started = True
301
  threading.Thread(target=train_initial_model, daemon=True).start()
302
  except Exception as e:
@@ -305,6 +324,9 @@ class IntentClassifier:
305
 
306
  def _upload_model_to_supabase(self, model_path: str):
307
  from backend.core import config
 
 
 
308
  try:
309
  with open(model_path, "rb") as f:
310
  data = f.read()
@@ -320,6 +342,8 @@ class IntentClassifier:
320
 
321
  def _download_model_from_supabase(self, model_path: str) -> bool:
322
  from backend.core import config
 
 
323
  try:
324
  sb = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
325
  data = sb.storage.from_("rag-models").download("intent_model.pkl")
@@ -382,18 +406,19 @@ class IntentClassifier:
382
  "user_id": user_id,
383
  }
384
  supabase_ok = False
385
- try:
386
- supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
387
- supabase.table("intent_feedback").insert({
388
- "user_id": user_id,
389
- "query": query,
390
- "has_category": has_category,
391
- "has_history": has_history,
392
- "label": int(was_needed),
393
- }).execute()
394
- supabase_ok = True
395
- except Exception as e:
396
- log.warning("Supabase intent_feedback insert failed: %s", e)
 
397
 
398
  if not supabase_ok:
399
  try:
@@ -407,6 +432,8 @@ class IntentClassifier:
407
  def _maybe_retrain(self):
408
  from backend.core import config
409
  try:
 
 
410
  supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
411
  result = supabase.table("intent_feedback").select("id", count="exact").execute()
412
  n = result.count or 0
@@ -429,14 +456,15 @@ class IntentClassifier:
429
  # Load feedback
430
  feedback = []
431
  loaded_from_supabase = False
432
- try:
433
- supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
434
- rows = supabase.table("intent_feedback").select("query,has_category,has_history,label").execute()
435
- for r in (rows.data or []):
436
- feedback.append((r["query"], r["has_category"], r["has_history"], r["label"]))
437
- loaded_from_supabase = True
438
- except Exception as e:
439
- log.info("Supabase feedback read not available yet: %s", e)
 
440
 
441
  if not loaded_from_supabase:
442
  try:
@@ -489,6 +517,18 @@ class IntentClassifier:
489
  return {"needs_clarification": True, "confidence": 0.75}
490
  return {"needs_clarification": False, "confidence": 0.85}
491
 
 
 
 
 
 
 
 
 
 
 
 
 
492
 
493
  # =========================================================================== #
494
  # TRAINING #
@@ -570,18 +610,19 @@ def train_initial_model():
570
  joblib.dump({"classifier": clf, "version": 1, "n_examples": n}, config.INTENT_MODEL_PATH)
571
  log.info("Initial model saved to %s (%d examples).", config.INTENT_MODEL_PATH, n)
572
  # Upload to Supabase directly without going through singleton
573
- try:
574
- with open(config.INTENT_MODEL_PATH, "rb") as f:
575
- model_bytes = f.read()
576
- sb = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
577
- sb.storage.from_("rag-models").upload(
578
- path="intent_model.pkl",
579
- file=model_bytes,
580
- file_options={"content-type": "application/octet-stream", "upsert": "true"},
581
- )
582
- log.info("Initial model uploaded to Supabase Storage.")
583
- except Exception as e:
584
- log.warning("Could not upload initial model: %s", e)
 
585
 
586
  # Force singleton to reload
587
  inst = IntentClassifier()
@@ -590,4 +631,8 @@ def train_initial_model():
590
 
591
 
592
  # Module-level singleton β€” imported by pipeline.py
593
- intent_classifier = IntentClassifier()
 
 
 
 
 
33
 
34
  log = logging.getLogger("nexus.intent")
35
 
36
+
37
+ def _bootstrap_disabled() -> bool:
38
+ from backend.core import config
39
+ force_disabled = os.getenv("NEXUS_DISABLE_INTENT_BOOTSTRAP", "false").lower() in {"1", "true", "yes"}
40
+ # Default production-safe posture: do not train on import unless explicitly enabled.
41
+ return force_disabled or (not config.INTENT_BOOTSTRAP_ON_STARTUP)
42
+
43
+
44
+ def _supabase_config_available() -> bool:
45
+ from backend.core import config
46
+ return bool(config.SUPABASE_URL and config.SUPABASE_SERVICE_KEY)
47
+
48
  # ── Lazy imports (heavy β€” only load once at first use) ───────────────────────
49
  _embedder = None
50
  _embedder_lock = threading.Lock()
 
240
  [384:392] β€” 8 structural context signals
241
  """
242
  embedder = _get_embedder()
243
+ # Disable tqdm bars to keep Docker/build logs stable and readable.
244
+ query_embedding = embedder.encode(
245
+ query,
246
+ normalize_embeddings=True,
247
+ show_progress_bar=False,
248
+ )
249
 
250
  q = query.lower().strip()
251
  words = q.split()
 
313
  else:
314
  log.info("No intent model found β€” will use fallback until trained.")
315
  self._ready = False
316
+ if _bootstrap_disabled():
317
+ log.info("Intent bootstrap disabled by NEXUS_DISABLE_INTENT_BOOTSTRAP.")
318
+ elif not getattr(self, "_bootstrap_started", False):
319
  self._bootstrap_started = True
320
  threading.Thread(target=train_initial_model, daemon=True).start()
321
  except Exception as e:
 
324
 
325
  def _upload_model_to_supabase(self, model_path: str):
326
  from backend.core import config
327
+ if not _supabase_config_available():
328
+ log.info("Skipping model upload: Supabase config unavailable.")
329
+ return
330
  try:
331
  with open(model_path, "rb") as f:
332
  data = f.read()
 
342
 
343
  def _download_model_from_supabase(self, model_path: str) -> bool:
344
  from backend.core import config
345
+ if not _supabase_config_available():
346
+ return False
347
  try:
348
  sb = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
349
  data = sb.storage.from_("rag-models").download("intent_model.pkl")
 
406
  "user_id": user_id,
407
  }
408
  supabase_ok = False
409
+ if _supabase_config_available():
410
+ try:
411
+ supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
412
+ supabase.table("intent_feedback").insert({
413
+ "user_id": user_id,
414
+ "query": query,
415
+ "has_category": has_category,
416
+ "has_history": has_history,
417
+ "label": int(was_needed),
418
+ }).execute()
419
+ supabase_ok = True
420
+ except Exception as e:
421
+ log.warning("Supabase intent_feedback insert failed: %s", e)
422
 
423
  if not supabase_ok:
424
  try:
 
432
  def _maybe_retrain(self):
433
  from backend.core import config
434
  try:
435
+ if not _supabase_config_available():
436
+ raise RuntimeError("Supabase config unavailable")
437
  supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
438
  result = supabase.table("intent_feedback").select("id", count="exact").execute()
439
  n = result.count or 0
 
456
  # Load feedback
457
  feedback = []
458
  loaded_from_supabase = False
459
+ if _supabase_config_available():
460
+ try:
461
+ supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
462
+ rows = supabase.table("intent_feedback").select("query,has_category,has_history,label").execute()
463
+ for r in (rows.data or []):
464
+ feedback.append((r["query"], r["has_category"], r["has_history"], r["label"]))
465
+ loaded_from_supabase = True
466
+ except Exception as e:
467
+ log.info("Supabase feedback read not available yet: %s", e)
468
 
469
  if not loaded_from_supabase:
470
  try:
 
517
  return {"needs_clarification": True, "confidence": 0.75}
518
  return {"needs_clarification": False, "confidence": 0.85}
519
 
520
+ def status(self) -> dict:
521
+ """
522
+ Lightweight runtime status for health endpoints/observability.
523
+ """
524
+ from backend.core import config
525
+ return {
526
+ "ready": bool(self._ready and self._clf is not None),
527
+ "model_path": config.INTENT_MODEL_PATH,
528
+ "model_exists": Path(config.INTENT_MODEL_PATH).exists(),
529
+ "bootstrap_enabled": (not _bootstrap_disabled()),
530
+ }
531
+
532
 
533
  # =========================================================================== #
534
  # TRAINING #
 
610
  joblib.dump({"classifier": clf, "version": 1, "n_examples": n}, config.INTENT_MODEL_PATH)
611
  log.info("Initial model saved to %s (%d examples).", config.INTENT_MODEL_PATH, n)
612
  # Upload to Supabase directly without going through singleton
613
+ if _supabase_config_available():
614
+ try:
615
+ with open(config.INTENT_MODEL_PATH, "rb") as f:
616
+ model_bytes = f.read()
617
+ sb = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_KEY)
618
+ sb.storage.from_("rag-models").upload(
619
+ path="intent_model.pkl",
620
+ file=model_bytes,
621
+ file_options={"content-type": "application/octet-stream", "upsert": "true"},
622
+ )
623
+ log.info("Initial model uploaded to Supabase Storage.")
624
+ except Exception as e:
625
+ log.warning("Could not upload initial model: %s", e)
626
 
627
  # Force singleton to reload
628
  inst = IntentClassifier()
 
631
 
632
 
633
  # Module-level singleton β€” imported by pipeline.py
634
+ intent_classifier = IntentClassifier()
635
+
636
+
637
+ def get_intent_classifier_status() -> dict:
638
+ return intent_classifier.status()
backend/core/pipeline.py CHANGED
@@ -233,6 +233,7 @@ def get_cached_embedding(text: str) -> list:
233
  # Schema: supabase/migrations/0003_rerank_feedback.sql #
234
  # =========================================================================== #
235
 
 
236
  def _log_rerank_feedback(
237
  query: str,
238
  all_candidates: list,
@@ -244,6 +245,7 @@ def _log_rerank_feedback(
244
  Write rerank results to rerank_feedback table via a daemon thread.
245
  Completely non-blocking -- exceptions are swallowed so query never fails.
246
  """
 
247
  def _write():
248
  try:
249
  sb = _build_service_supabase_client()
@@ -265,25 +267,31 @@ def _log_rerank_feedback(
265
  doc_type = chunk.get("metadata", {}).get("document_type")
266
  chunk_id_raw = chunk.get("id")
267
  try:
268
- chunk_uuid = str(uuid.UUID(str(chunk_id_raw))) if chunk_id_raw else None
 
 
269
  except Exception:
270
  chunk_uuid = None
271
 
272
- rows.append({
273
- "user_id": user_id,
274
- "query_hash": q_hash,
275
- "query_text": query, # Added for local distillation
276
- "chunk_id": chunk_uuid,
277
- "chunk_hash": c_hash,
278
- "chunk_text": content[:500], # Added (truncated to save space)
279
- "document_type": doc_type,
280
- "cohere_score": float(score),
281
- "was_selected": c_hash in selected_hashes,
282
- })
 
 
283
 
284
  if rows:
285
  for start in range(0, len(rows), 50):
286
- sb.table("rerank_feedback").insert(rows[start:start + 50]).execute()
 
 
287
  log.debug("Logged %d rerank feedback rows.", len(rows))
288
  except Exception as exc:
289
  log.debug("rerank_feedback logging skipped: %s", exc)
@@ -296,7 +304,6 @@ def _log_rerank_feedback(
296
  # =========================================================================== #
297
 
298
 
299
-
300
  def get_existing_categories(access_token: str = None) -> List[str]:
301
  """Server-side DISTINCT via get_document_types() SQL function."""
302
  supabase = _build_supabase_client(access_token)
@@ -690,7 +697,9 @@ def process_chunks(
690
  return docs, ids
691
 
692
 
693
- def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[List[Document], List[str]]:
 
 
694
  """
695
  RAPTOR implementation: recursively clusters documents and generates
696
  parent summaries until we reach a single root node.
@@ -712,7 +721,7 @@ def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[L
712
  CLUSTER_SIZE = 5
713
 
714
  llm = _build_llm(needs_vision=False, use_ingestion=True)
715
-
716
  # Prompt for collapsing children into a parent concept node
717
  cluster_prompt = (
718
  "SYSTEM ROLE: You are an expert document synthesist building a hierarchical reasoning tree.\n"
@@ -727,13 +736,15 @@ def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[L
727
  )
728
 
729
  while len(current_level_docs) > 1:
730
- print(f" [RAPTOR] Building Level {current_level} (from {len(current_level_docs)} children)...")
 
 
731
  next_level_docs = []
732
 
733
  # Iterate in clusters
734
  for i in range(0, len(current_level_docs), CLUSTER_SIZE):
735
  cluster = current_level_docs[i : i + CLUSTER_SIZE]
736
-
737
  # Combine the underlying texts (use the existing summary if available, else raw text)
738
  sections_text = ""
739
  for j, child in enumerate(cluster):
@@ -742,7 +753,7 @@ def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[L
742
  child_text = child.metadata.get("summary", child.page_content)
743
  if not child_text or child_text == "No summary available.":
744
  child_text = child.page_content
745
- sections_text += f"--- SECTION {j+1} ---\n{child_text}\n\n"
746
 
747
  # Generate the parent summary
748
  prompt = cluster_prompt.format(count=len(cluster), sections=sections_text)
@@ -750,19 +761,26 @@ def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[L
750
  response = llm.invoke([HumanMessage(content=prompt)])
751
  parent_text = response.content
752
  except Exception as e:
753
- log.warning(f"RAPTOR summarization failed at level {current_level}, segment {i}: {e}")
 
 
754
  # Fallback: just concatenate
755
- parent_text = "Merged Content:\n" + "\n".join([c.page_content[:500] for c in cluster])
 
 
756
 
757
  # Generate deterministic ID for the parent
758
  import hashlib
 
759
  parent_hash = hashlib.md5(parent_text.encode()).hexdigest()
760
- parent_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"raptor_{current_level}_{parent_hash}"))
 
 
761
 
762
  # Create the parent document
763
  # Inherit metadata from the first child (source array, file hash, document type)
764
  base_meta = cluster[0].metadata
765
-
766
  # Gather all unique page numbers from children
767
  all_pages = set()
768
  for c in cluster:
@@ -779,10 +797,10 @@ def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[L
779
  "node_type": "summary",
780
  "node_level": current_level,
781
  "node_id": parent_id,
782
- "parent_node_id": None, # Will be set by the NEXT level up
783
  "page_numbers": sorted(list(all_pages)),
784
- "children_count": len(cluster)
785
- }
786
  )
787
 
788
  # Update children to point to this parent
@@ -796,7 +814,9 @@ def build_raptor_tree(leaf_docs: List[Document], leaf_ids: List[str]) -> tuple[L
796
  current_level_docs = next_level_docs
797
  current_level += 1
798
 
799
- print(f" [RAPTOR] Tree built. Total nodes: {len(all_docs)} (Leaves: {len(leaf_docs)}, Summaries: {len(all_docs) - len(leaf_docs)})")
 
 
800
  return all_docs, all_ids
801
 
802
 
@@ -902,9 +922,9 @@ def _apply_category_override(
902
  ).execute()
903
 
904
  # Update ingested_files registry
905
- supabase.table("ingested_files").update({"document_type": new_category,"user_overridden": True}).eq(
906
- "file_hash", file_hash
907
- ).execute()
908
 
909
  # Refresh materialized view so sidebar filter updates immediately
910
  try:
@@ -963,11 +983,41 @@ def delete_document(file_hash: str, access_token: str = None) -> None:
963
  except Exception:
964
  pass
965
 
 
966
  def upload_to_supabase(
967
  documents: List[Document],
968
  ids: List[str],
969
  access_token: str = None,
970
  ) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
971
  BATCH_SIZE = config.UPLOAD_BATCH_SIZE
972
  BATCH_SLEEP = config.UPLOAD_BATCH_SLEEP_S
973
 
@@ -995,25 +1045,77 @@ def upload_to_supabase(
995
 
996
  log.info("Batch %d/%d (%d docs)...", batch_num, total_batches, len(batch_docs))
997
 
998
- # Embed the batch
999
- texts = [doc.page_content for doc in batch_docs]
1000
- vectors = embedder.embed_documents(texts)
1001
-
1002
- # Insert via RPC β€” user_id is explicit, not from metadata
1003
- for doc, doc_id, vector in zip(batch_docs, batch_ids, vectors):
1004
- sb.rpc(
1005
- "insert_document_chunk",
1006
- {
1007
- "p_id": doc_id,
1008
- "p_content": doc.page_content,
1009
- "p_metadata": doc.metadata,
1010
- "p_embedding": vector,
1011
- "p_user_id": user_id,
1012
- "p_node_type": doc.metadata.get("node_type", "leaf"),
1013
- "p_parent_node_id": doc.metadata.get("parent_node_id"),
1014
- "p_node_level": doc.metadata.get("node_level", 0),
1015
- },
1016
- ).execute()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1017
 
1018
  if start + BATCH_SIZE < len(documents):
1019
  time.sleep(BATCH_SLEEP)
@@ -1063,7 +1165,7 @@ def run_ingestion(
1063
  if not force and is_file_already_ingested(file_hash, access_token=access_token):
1064
  log.info("SKIPPING β€” already ingested.")
1065
  return "already_ingested"
1066
-
1067
  # NEW: Check if user has previously overridden the category for this file.
1068
  # If so, skip the classifier and use their choice directly.
1069
  forced_category = None
@@ -1553,7 +1655,7 @@ def retrieve_chunks(
1553
  return [cache_doc]
1554
  except Exception as e:
1555
  log.warning("Cache check failed, proceeding normally: %s", e)
1556
-
1557
  queries_to_run = generate_sub_queries(query)
1558
 
1559
  dynamic_k = 10 if len(queries_to_run) > 1 else 5
@@ -1646,7 +1748,7 @@ def retrieve_chunks(
1646
  base_threshold = 0.0001 # sigmoid of ms-marco logits is very small
1647
  else:
1648
  base_threshold = RELEVANCE_THRESHOLD # 0.35 for Cohere
1649
-
1650
  effective_threshold = base_threshold
1651
  if filter_dict or len(all_candidates) <= 10:
1652
  if reranker == "cohere":
@@ -1813,7 +1915,9 @@ def retrieve_chunks(
1813
  if budgeted:
1814
  log.info(
1815
  "Context budget: %d chars across %d/%d chunks.",
1816
- total_chars, len(budgeted), len(retrieved),
 
 
1817
  )
1818
  retrieved = budgeted
1819
 
@@ -1878,7 +1982,7 @@ def generate_answer(
1878
  for i, chunk in enumerate(chunks, 1):
1879
  prompt += f"--- Source {i} ---\n"
1880
  meta = chunk.metadata
1881
-
1882
  # Determine if this is a raw chunk or a RAPTOR summary node
1883
  node_type = meta.get("node_type", "leaf")
1884
  node_level = meta.get("node_level", 0)
@@ -1887,7 +1991,7 @@ def generate_answer(
1887
  # For summary nodes, page_content IS the summary. There is no raw original_content.
1888
  prompt += f"[SYNTHESIZED CHAPTER SUMMARY - LEVEL {node_level}]\n"
1889
  prompt += f"TEXT:\n{chunk.page_content}\n\n"
1890
- original = {} # summaries don't have tables/images directly attached yet
1891
  else:
1892
  # Traditional leaf chunk
1893
  original = meta.get("original_content")
@@ -1896,7 +2000,7 @@ def generate_answer(
1896
  original = json.loads(original)
1897
  if isinstance(original, str):
1898
  original = json.loads(original)
1899
- except:
1900
  original = {}
1901
  elif not isinstance(original, dict):
1902
  original = {}
@@ -1996,7 +2100,7 @@ async def generate_answer_stream(
1996
  yield {"type": "done", "images": []}
1997
  return
1998
  # ── Cache hit handler ────────────────────────────────────────────────────
1999
-
2000
  if len(chunks) == 1 and chunks[0].page_content == "__CACHE_HIT__":
2001
  cached = chunks[0].metadata.get("__cache__", {})
2002
  answer = cached.get("answer", "")
@@ -2007,7 +2111,7 @@ async def generate_answer_stream(
2007
  await asyncio.sleep(0)
2008
  yield {"type": "done", "sources": cached.get("sources", []), "images": []}
2009
  return
2010
-
2011
  # ── TASK 3: Log RAGAS reward signal to evaluation_logs ───────────────────
2012
  # Runs fire-and-forget in an executor so it never blocks streaming.
2013
  # Uses the Cohere relevance scores already embedded in chunk metadata.
@@ -2092,7 +2196,7 @@ async def generate_answer_stream(
2092
  page_ref = pages[0] if pages else "unknown"
2093
  loc_key = f"{file_hash}_p{page_ref}"
2094
  chunk_relevance = meta.get("relevance_score", 0)
2095
-
2096
  if (
2097
  chunk_relevance >= config.IMAGE_RELEVANCE_THRESHOLD
2098
  ): # only show images from highly relevant chunks
@@ -2154,32 +2258,40 @@ async def generate_answer_stream(
2154
  yield {"type": "token", "content": suffix}
2155
  full_answer += suffix
2156
  # ── Store in query cache ─────────────────────────────────────────────
2157
-
2158
  try:
2159
  if access_token:
2160
  from backend.core.auth_utils import extract_jwt_sub
2161
  from backend.core.cache_manager import store_cached_answer
 
2162
  _uid = extract_jwt_sub(access_token)
2163
  _query_vec = get_cached_embedding(query)
2164
  _chunk_ids = [c.metadata.get("id", "") for c in chunks]
2165
- _doc_types = list(set(
2166
- c.metadata.get("document_type", "general_document") for c in chunks
2167
- ))
 
 
 
2168
  _sources = []
2169
  for c in chunks:
2170
  meta = c.metadata
2171
  orig = meta.get("original_content", "{}")
2172
  if isinstance(orig, str):
2173
- try: orig = json.loads(orig) # noqa: E701
2174
- except: orig = {} # noqa: E701, E722
2175
- _sources.append({
2176
- "source": meta.get("source", "Unknown"),
2177
- "score": meta.get("relevance_score"),
2178
- "chunk": meta.get("chunk_index"),
2179
- "snippet": (orig.get("raw_text") or c.page_content)[:200],
2180
- "doc_type": meta.get("document_type"),
2181
- "pages": meta.get("page_numbers"),
2182
- })
 
 
 
 
2183
  store_cached_answer(
2184
  user_id=_uid,
2185
  query_embedding=_query_vec,
@@ -2193,7 +2305,6 @@ async def generate_answer_stream(
2193
  except Exception:
2194
  pass # cache store never blocks response
2195
 
2196
-
2197
  # Save to memory async
2198
  try:
2199
  loop = asyncio.get_event_loop()
 
233
  # Schema: supabase/migrations/0003_rerank_feedback.sql #
234
  # =========================================================================== #
235
 
236
+
237
  def _log_rerank_feedback(
238
  query: str,
239
  all_candidates: list,
 
245
  Write rerank results to rerank_feedback table via a daemon thread.
246
  Completely non-blocking -- exceptions are swallowed so query never fails.
247
  """
248
+
249
  def _write():
250
  try:
251
  sb = _build_service_supabase_client()
 
267
  doc_type = chunk.get("metadata", {}).get("document_type")
268
  chunk_id_raw = chunk.get("id")
269
  try:
270
+ chunk_uuid = (
271
+ str(uuid.UUID(str(chunk_id_raw))) if chunk_id_raw else None
272
+ )
273
  except Exception:
274
  chunk_uuid = None
275
 
276
+ rows.append(
277
+ {
278
+ "user_id": user_id,
279
+ "query_hash": q_hash,
280
+ "query_text": query, # Added for local distillation
281
+ "chunk_id": chunk_uuid,
282
+ "chunk_hash": c_hash,
283
+ "chunk_text": content[:500], # Added (truncated to save space)
284
+ "document_type": doc_type,
285
+ "cohere_score": float(score),
286
+ "was_selected": c_hash in selected_hashes,
287
+ }
288
+ )
289
 
290
  if rows:
291
  for start in range(0, len(rows), 50):
292
+ sb.table("rerank_feedback").insert(
293
+ rows[start : start + 50]
294
+ ).execute()
295
  log.debug("Logged %d rerank feedback rows.", len(rows))
296
  except Exception as exc:
297
  log.debug("rerank_feedback logging skipped: %s", exc)
 
304
  # =========================================================================== #
305
 
306
 
 
307
  def get_existing_categories(access_token: str = None) -> List[str]:
308
  """Server-side DISTINCT via get_document_types() SQL function."""
309
  supabase = _build_supabase_client(access_token)
 
697
  return docs, ids
698
 
699
 
700
+ def build_raptor_tree(
701
+ leaf_docs: List[Document], leaf_ids: List[str]
702
+ ) -> tuple[List[Document], List[str]]:
703
  """
704
  RAPTOR implementation: recursively clusters documents and generates
705
  parent summaries until we reach a single root node.
 
721
  CLUSTER_SIZE = 5
722
 
723
  llm = _build_llm(needs_vision=False, use_ingestion=True)
724
+
725
  # Prompt for collapsing children into a parent concept node
726
  cluster_prompt = (
727
  "SYSTEM ROLE: You are an expert document synthesist building a hierarchical reasoning tree.\n"
 
736
  )
737
 
738
  while len(current_level_docs) > 1:
739
+ print(
740
+ f" [RAPTOR] Building Level {current_level} (from {len(current_level_docs)} children)..."
741
+ )
742
  next_level_docs = []
743
 
744
  # Iterate in clusters
745
  for i in range(0, len(current_level_docs), CLUSTER_SIZE):
746
  cluster = current_level_docs[i : i + CLUSTER_SIZE]
747
+
748
  # Combine the underlying texts (use the existing summary if available, else raw text)
749
  sections_text = ""
750
  for j, child in enumerate(cluster):
 
753
  child_text = child.metadata.get("summary", child.page_content)
754
  if not child_text or child_text == "No summary available.":
755
  child_text = child.page_content
756
+ sections_text += f"--- SECTION {j + 1} ---\n{child_text}\n\n"
757
 
758
  # Generate the parent summary
759
  prompt = cluster_prompt.format(count=len(cluster), sections=sections_text)
 
761
  response = llm.invoke([HumanMessage(content=prompt)])
762
  parent_text = response.content
763
  except Exception as e:
764
+ log.warning(
765
+ f"RAPTOR summarization failed at level {current_level}, segment {i}: {e}"
766
+ )
767
  # Fallback: just concatenate
768
+ parent_text = "Merged Content:\n" + "\n".join(
769
+ [c.page_content[:500] for c in cluster]
770
+ )
771
 
772
  # Generate deterministic ID for the parent
773
  import hashlib
774
+
775
  parent_hash = hashlib.md5(parent_text.encode()).hexdigest()
776
+ parent_id = str(
777
+ uuid.uuid5(uuid.NAMESPACE_DNS, f"raptor_{current_level}_{parent_hash}")
778
+ )
779
 
780
  # Create the parent document
781
  # Inherit metadata from the first child (source array, file hash, document type)
782
  base_meta = cluster[0].metadata
783
+
784
  # Gather all unique page numbers from children
785
  all_pages = set()
786
  for c in cluster:
 
797
  "node_type": "summary",
798
  "node_level": current_level,
799
  "node_id": parent_id,
800
+ "parent_node_id": None, # Will be set by the NEXT level up
801
  "page_numbers": sorted(list(all_pages)),
802
+ "children_count": len(cluster),
803
+ },
804
  )
805
 
806
  # Update children to point to this parent
 
814
  current_level_docs = next_level_docs
815
  current_level += 1
816
 
817
+ print(
818
+ f" [RAPTOR] Tree built. Total nodes: {len(all_docs)} (Leaves: {len(leaf_docs)}, Summaries: {len(all_docs) - len(leaf_docs)})"
819
+ )
820
  return all_docs, all_ids
821
 
822
 
 
922
  ).execute()
923
 
924
  # Update ingested_files registry
925
+ supabase.table("ingested_files").update(
926
+ {"document_type": new_category, "user_overridden": True}
927
+ ).eq("file_hash", file_hash).execute()
928
 
929
  # Refresh materialized view so sidebar filter updates immediately
930
  try:
 
983
  except Exception:
984
  pass
985
 
986
+
987
  def upload_to_supabase(
988
  documents: List[Document],
989
  ids: List[str],
990
  access_token: str = None,
991
  ) -> None:
992
+ def _log_ingestion_retry_event(
993
+ *,
994
+ user_id: str,
995
+ batch_num: int,
996
+ total_batches: int,
997
+ attempt: int,
998
+ event_type: str,
999
+ message: str = "",
1000
+ sleep_s: float = 0.0,
1001
+ ) -> None:
1002
+ """
1003
+ Best-effort telemetry for ingestion retry behavior.
1004
+ Table: public.ingestion_retry_logs
1005
+ """
1006
+ try:
1007
+ _build_service_supabase_client().table("ingestion_retry_logs").insert(
1008
+ {
1009
+ "user_id": user_id,
1010
+ "batch_num": batch_num,
1011
+ "total_batches": total_batches,
1012
+ "attempt": attempt,
1013
+ "event_type": event_type,
1014
+ "message": message[:500],
1015
+ "sleep_s": sleep_s,
1016
+ }
1017
+ ).execute()
1018
+ except Exception:
1019
+ pass
1020
+
1021
  BATCH_SIZE = config.UPLOAD_BATCH_SIZE
1022
  BATCH_SLEEP = config.UPLOAD_BATCH_SLEEP_S
1023
 
 
1045
 
1046
  log.info("Batch %d/%d (%d docs)...", batch_num, total_batches, len(batch_docs))
1047
 
1048
+ max_attempts = max(1, int(config.UPLOAD_RETRY_MAX_ATTEMPTS))
1049
+ base_sleep = float(config.UPLOAD_RETRY_BASE_SLEEP_S)
1050
+ max_sleep = float(config.UPLOAD_RETRY_MAX_SLEEP_S)
1051
+ attempt = 0
1052
+ while True:
1053
+ attempt += 1
1054
+ try:
1055
+ # Embed the batch
1056
+ texts = [doc.page_content for doc in batch_docs]
1057
+ vectors = embedder.embed_documents(texts)
1058
+
1059
+ # Insert via RPC β€” user_id is explicit, not from metadata
1060
+ for doc, doc_id, vector in zip(batch_docs, batch_ids, vectors):
1061
+ sb.rpc(
1062
+ "insert_document_chunk",
1063
+ {
1064
+ "p_id": doc_id,
1065
+ "p_content": doc.page_content,
1066
+ "p_metadata": doc.metadata,
1067
+ "p_embedding": vector,
1068
+ "p_user_id": user_id,
1069
+ "p_node_type": doc.metadata.get("node_type", "leaf"),
1070
+ "p_parent_node_id": doc.metadata.get("parent_node_id"),
1071
+ "p_node_level": doc.metadata.get("node_level", 0),
1072
+ },
1073
+ ).execute()
1074
+ _log_ingestion_retry_event(
1075
+ user_id=user_id,
1076
+ batch_num=batch_num,
1077
+ total_batches=total_batches,
1078
+ attempt=attempt,
1079
+ event_type="success",
1080
+ message="batch uploaded",
1081
+ )
1082
+ break
1083
+ except Exception as e:
1084
+ err = str(e).lower()
1085
+ retryable = any(
1086
+ x in err for x in ["429", "rate", "too many requests", "quota"]
1087
+ )
1088
+ if (not retryable) or attempt >= max_attempts:
1089
+ _log_ingestion_retry_event(
1090
+ user_id=user_id,
1091
+ batch_num=batch_num,
1092
+ total_batches=total_batches,
1093
+ attempt=attempt,
1094
+ event_type="failed",
1095
+ message=str(e),
1096
+ )
1097
+ raise
1098
+ # Exponential backoff with cap to stay below burst limits.
1099
+ sleep_s = min(max_sleep, base_sleep * (2 ** (attempt - 1)))
1100
+ log.warning(
1101
+ "Batch %d/%d rate-limited, retrying in %.1fs (attempt %d/%d): %s",
1102
+ batch_num,
1103
+ total_batches,
1104
+ sleep_s,
1105
+ attempt,
1106
+ max_attempts,
1107
+ str(e)[:120],
1108
+ )
1109
+ _log_ingestion_retry_event(
1110
+ user_id=user_id,
1111
+ batch_num=batch_num,
1112
+ total_batches=total_batches,
1113
+ attempt=attempt,
1114
+ event_type="retry",
1115
+ message=str(e),
1116
+ sleep_s=sleep_s,
1117
+ )
1118
+ time.sleep(sleep_s)
1119
 
1120
  if start + BATCH_SIZE < len(documents):
1121
  time.sleep(BATCH_SLEEP)
 
1165
  if not force and is_file_already_ingested(file_hash, access_token=access_token):
1166
  log.info("SKIPPING β€” already ingested.")
1167
  return "already_ingested"
1168
+
1169
  # NEW: Check if user has previously overridden the category for this file.
1170
  # If so, skip the classifier and use their choice directly.
1171
  forced_category = None
 
1655
  return [cache_doc]
1656
  except Exception as e:
1657
  log.warning("Cache check failed, proceeding normally: %s", e)
1658
+
1659
  queries_to_run = generate_sub_queries(query)
1660
 
1661
  dynamic_k = 10 if len(queries_to_run) > 1 else 5
 
1748
  base_threshold = 0.0001 # sigmoid of ms-marco logits is very small
1749
  else:
1750
  base_threshold = RELEVANCE_THRESHOLD # 0.35 for Cohere
1751
+
1752
  effective_threshold = base_threshold
1753
  if filter_dict or len(all_candidates) <= 10:
1754
  if reranker == "cohere":
 
1915
  if budgeted:
1916
  log.info(
1917
  "Context budget: %d chars across %d/%d chunks.",
1918
+ total_chars,
1919
+ len(budgeted),
1920
+ len(retrieved),
1921
  )
1922
  retrieved = budgeted
1923
 
 
1982
  for i, chunk in enumerate(chunks, 1):
1983
  prompt += f"--- Source {i} ---\n"
1984
  meta = chunk.metadata
1985
+
1986
  # Determine if this is a raw chunk or a RAPTOR summary node
1987
  node_type = meta.get("node_type", "leaf")
1988
  node_level = meta.get("node_level", 0)
 
1991
  # For summary nodes, page_content IS the summary. There is no raw original_content.
1992
  prompt += f"[SYNTHESIZED CHAPTER SUMMARY - LEVEL {node_level}]\n"
1993
  prompt += f"TEXT:\n{chunk.page_content}\n\n"
1994
+ original = {} # summaries don't have tables/images directly attached yet
1995
  else:
1996
  # Traditional leaf chunk
1997
  original = meta.get("original_content")
 
2000
  original = json.loads(original)
2001
  if isinstance(original, str):
2002
  original = json.loads(original)
2003
+ except: # noqa: E722
2004
  original = {}
2005
  elif not isinstance(original, dict):
2006
  original = {}
 
2100
  yield {"type": "done", "images": []}
2101
  return
2102
  # ── Cache hit handler ────────────────────────────────────────────────────
2103
+
2104
  if len(chunks) == 1 and chunks[0].page_content == "__CACHE_HIT__":
2105
  cached = chunks[0].metadata.get("__cache__", {})
2106
  answer = cached.get("answer", "")
 
2111
  await asyncio.sleep(0)
2112
  yield {"type": "done", "sources": cached.get("sources", []), "images": []}
2113
  return
2114
+
2115
  # ── TASK 3: Log RAGAS reward signal to evaluation_logs ───────────────────
2116
  # Runs fire-and-forget in an executor so it never blocks streaming.
2117
  # Uses the Cohere relevance scores already embedded in chunk metadata.
 
2196
  page_ref = pages[0] if pages else "unknown"
2197
  loc_key = f"{file_hash}_p{page_ref}"
2198
  chunk_relevance = meta.get("relevance_score", 0)
2199
+
2200
  if (
2201
  chunk_relevance >= config.IMAGE_RELEVANCE_THRESHOLD
2202
  ): # only show images from highly relevant chunks
 
2258
  yield {"type": "token", "content": suffix}
2259
  full_answer += suffix
2260
  # ── Store in query cache ─────────────────────────────────────────────
2261
+
2262
  try:
2263
  if access_token:
2264
  from backend.core.auth_utils import extract_jwt_sub
2265
  from backend.core.cache_manager import store_cached_answer
2266
+
2267
  _uid = extract_jwt_sub(access_token)
2268
  _query_vec = get_cached_embedding(query)
2269
  _chunk_ids = [c.metadata.get("id", "") for c in chunks]
2270
+ _doc_types = list(
2271
+ set(
2272
+ c.metadata.get("document_type", "general_document")
2273
+ for c in chunks
2274
+ )
2275
+ )
2276
  _sources = []
2277
  for c in chunks:
2278
  meta = c.metadata
2279
  orig = meta.get("original_content", "{}")
2280
  if isinstance(orig, str):
2281
+ try:
2282
+ orig = json.loads(orig) # noqa: E701
2283
+ except:
2284
+ orig = {} # noqa: E701, E722
2285
+ _sources.append(
2286
+ {
2287
+ "source": meta.get("source", "Unknown"),
2288
+ "score": meta.get("relevance_score"),
2289
+ "chunk": meta.get("chunk_index"),
2290
+ "snippet": (orig.get("raw_text") or c.page_content)[:200],
2291
+ "doc_type": meta.get("document_type"),
2292
+ "pages": meta.get("page_numbers"),
2293
+ }
2294
+ )
2295
  store_cached_answer(
2296
  user_id=_uid,
2297
  query_embedding=_query_vec,
 
2305
  except Exception:
2306
  pass # cache store never blocks response
2307
 
 
2308
  # Save to memory async
2309
  try:
2310
  loop = asyncio.get_event_loop()
backend/core/providers.py CHANGED
@@ -472,17 +472,20 @@ class FallbackEmbeddings:
472
  def embed_documents(self, texts: Sequence[str]) -> List[List[float]]:
473
  if not texts:
474
  return []
 
475
  last_exc: Optional[BaseException] = None
476
  for model in self._models:
477
- try:
478
- result = self._build(model).embed_documents(list(texts))
479
- # Guard against OpenRouter returning 200 OK with data=None
480
- if result is None or any(v is None for v in result):
481
- raise ValueError(f"Model {model} returned null embeddings")
482
- return result
483
- except Exception as exc:
484
- last_exc = exc
485
- continue # always try next model on any failure
 
 
486
  if last_exc:
487
  raise last_exc
488
  raise RuntimeError("Embeddings failed without exception")
 
472
  def embed_documents(self, texts: Sequence[str]) -> List[List[float]]:
473
  if not texts:
474
  return []
475
+ import time
476
  last_exc: Optional[BaseException] = None
477
  for model in self._models:
478
+ for attempt in range(3):
479
+ try:
480
+ result = self._build(model).embed_documents(list(texts))
481
+ # Guard against OpenRouter returning 200 OK with data=None
482
+ if result is None or any(v is None for v in result):
483
+ raise ValueError(f"Model {model} returned null embeddings")
484
+ return result
485
+ except Exception as exc:
486
+ last_exc = exc
487
+ time.sleep(2 * (attempt + 1)) # Backoff: 2s, 4s, 6s
488
+ continue
489
  if last_exc:
490
  raise last_exc
491
  raise RuntimeError("Embeddings failed without exception")
backend/main.py CHANGED
@@ -10,7 +10,6 @@ from slowapi import Limiter, _rate_limit_exceeded_handler
10
  from slowapi.util import get_remote_address
11
  from slowapi.errors import RateLimitExceeded
12
  from starlette.requests import Request
13
- from starlette.responses import JSONResponse
14
 
15
 
16
  def _rate_limit_key(request: Request) -> str:
@@ -20,25 +19,28 @@ def _rate_limit_key(request: Request) -> str:
20
 
21
 
22
  limiter = Limiter(key_func=_rate_limit_key)
23
- import logging
24
- import subprocess
25
- from contextlib import asynccontextmanager
26
- from fastapi import FastAPI
27
- from fastapi.middleware.cors import CORSMiddleware
28
- from fastapi.staticfiles import StaticFiles
29
- from fastapi.responses import FileResponse
30
-
31
- from dotenv import load_dotenv
 
32
  load_dotenv()
33
 
34
- from backend.api import auth, corpus, ingest, query, admin,frontend_config
35
- log = logging.getLogger("nexus.main")
 
 
36
 
37
 
38
 
39
  @asynccontextmanager
40
  async def lifespan(app: FastAPI):
41
- log.info("NEXUS API starting")
42
 
43
  # Auto-start Celery worker unless explicitly disabled
44
  # Set AUTO_START_CELERY=false in HF Secrets (start.sh handles it there)
@@ -67,11 +69,11 @@ async def lifespan(app: FastAPI):
67
  celery_process.kill()
68
  log.info("Celery worker stopped.")
69
 
70
- log.info("NEXUS API stopped")
71
 
72
 
73
  app = FastAPI(
74
- title="NEXUS RAG API", version="1.0.0", lifespan=lifespan,
75
  docs_url = "/docs" if os.getenv("DOCS_ENABLED", "true").lower() == "true" else None,
76
  redoc_url = "/redoc" if os.getenv("DOCS_ENABLED", "true").lower() == "true" else None,
77
  )
@@ -95,9 +97,17 @@ app.include_router(frontend_config.router, prefix="/api/v1/config", tags=["confi
95
  def health():
96
  return {"status": "healthy"}
97
 
 
 
 
 
 
 
 
 
98
  @app.get("/api/status")
99
  def status():
100
- return {"status": "ok", "service": "NEXUS RAG API", "version": "1.0.0"}
101
 
102
  # ── Static Frontend ───────────────────────────────────────────────────────────
103
  # Mount the entire frontend folder at the root of the app so it serves the index.html.
 
10
  from slowapi.util import get_remote_address
11
  from slowapi.errors import RateLimitExceeded
12
  from starlette.requests import Request
 
13
 
14
 
15
  def _rate_limit_key(request: Request) -> str:
 
19
 
20
 
21
  limiter = Limiter(key_func=_rate_limit_key)
22
+ import logging # noqa: E402
23
+ import subprocess # noqa: E402
24
+ from contextlib import asynccontextmanager # noqa: E402
25
+ from fastapi import FastAPI # noqa: E402
26
+ from fastapi.middleware.cors import CORSMiddleware # noqa: E402
27
+ from fastapi.staticfiles import StaticFiles # noqa: E402
28
+ from fastapi.responses import FileResponse # noqa: E402
29
+
30
+ from dotenv import load_dotenv # noqa: E402
31
+
32
  load_dotenv()
33
 
34
+ from backend.api import auth, corpus, ingest, query, admin,frontend_config # noqa: E402
35
+ from backend.core.intent_classifier import get_intent_classifier_status # noqa: E402
36
+
37
+ log = logging.getLogger("morpheus.main")
38
 
39
 
40
 
41
  @asynccontextmanager
42
  async def lifespan(app: FastAPI):
43
+ log.info("MORPHEUS API starting")
44
 
45
  # Auto-start Celery worker unless explicitly disabled
46
  # Set AUTO_START_CELERY=false in HF Secrets (start.sh handles it there)
 
69
  celery_process.kill()
70
  log.info("Celery worker stopped.")
71
 
72
+ log.info("MORPHEUS API stopped")
73
 
74
 
75
  app = FastAPI(
76
+ title="Morpheus RAG API", version="1.0.0", lifespan=lifespan,
77
  docs_url = "/docs" if os.getenv("DOCS_ENABLED", "true").lower() == "true" else None,
78
  redoc_url = "/redoc" if os.getenv("DOCS_ENABLED", "true").lower() == "true" else None,
79
  )
 
97
  def health():
98
  return {"status": "healthy"}
99
 
100
+
101
+ @app.get("/health/details")
102
+ def health_details():
103
+ return {
104
+ "status": "healthy",
105
+ "intent_classifier": get_intent_classifier_status(),
106
+ }
107
+
108
  @app.get("/api/status")
109
  def status():
110
+ return {"status": "ok", "service": "Morpheus RAG API", "version": "1.0.0"}
111
 
112
  # ── Static Frontend ───────────────────────────────────────────────────────────
113
  # Mount the entire frontend folder at the root of the app so it serves the index.html.
frontend/index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>NEXUS β€” RAG Corpus Explorer</title>
7
  <script src="https://cdnjs.cloudflare.com/ajax/libs/d3/7.8.5/d3.min.js"></script>
8
  <link rel="preconnect" href="https://fonts.googleapis.com" />
9
  <link
@@ -46,12 +46,12 @@
46
  margin-bottom: 4px;
47
  "
48
  >
49
- NEX<span style="opacity: 0.5">US</span>
50
  </div>
51
  <div
52
  style="font-size: 0.75rem; color: var(--muted); letter-spacing: 0.12em"
53
  >
54
- ENTER TODAY'S ACCESS CODE
55
  </div>
56
  <input
57
  type="email"
@@ -68,7 +68,7 @@
68
  <input
69
  type="password"
70
  id="loginPassword"
71
- placeholder="Access code…"
72
  style="
73
  width: 260px;
74
  text-align: center;
@@ -171,7 +171,7 @@
171
  <div id="app" style="display: none">
172
  <!-- TOPBAR -->
173
  <header id="topbar">
174
- <div class="topbar-logo">NEX<span>US</span></div>
175
  <nav class="topbar-nav">
176
  <button
177
  class="nav-btn active"
@@ -343,7 +343,7 @@
343
  </div>
344
  <div class="chat-messages" id="chatMessages">
345
  <div class="msg assistant">
346
- <div class="msg-role">NEXUS</div>
347
  <div class="msg-bubble">
348
  Corpus loaded. Ask me anything about your documents.
349
  </div>
 
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Morpheus β€” RAG Corpus Explorer</title>
7
  <script src="https://cdnjs.cloudflare.com/ajax/libs/d3/7.8.5/d3.min.js"></script>
8
  <link rel="preconnect" href="https://fonts.googleapis.com" />
9
  <link
 
46
  margin-bottom: 4px;
47
  "
48
  >
49
+ MOR<span style="opacity: 0.5">PHEUS</span>
50
  </div>
51
  <div
52
  style="font-size: 0.75rem; color: var(--muted); letter-spacing: 0.12em"
53
  >
54
+ AUTHENTICATION REQUIRED
55
  </div>
56
  <input
57
  type="email"
 
68
  <input
69
  type="password"
70
  id="loginPassword"
71
+ placeholder="Password…"
72
  style="
73
  width: 260px;
74
  text-align: center;
 
171
  <div id="app" style="display: none">
172
  <!-- TOPBAR -->
173
  <header id="topbar">
174
+ <div class="topbar-logo">MOR<span>PHEUS</span></div>
175
  <nav class="topbar-nav">
176
  <button
177
  class="nav-btn active"
 
343
  </div>
344
  <div class="chat-messages" id="chatMessages">
345
  <div class="msg assistant">
346
+ <div class="msg-role">MORPHEUS</div>
347
  <div class="msg-bubble">
348
  Corpus loaded. Ask me anything about your documents.
349
  </div>
frontend/js/chat.js CHANGED
@@ -264,7 +264,7 @@ function appendMsg(role, text, sources = [], images = []) {
264
  : renderMarkdown(text);
265
 
266
  div.innerHTML = `
267
- <div class="msg-role">${role === 'user' ? 'YOU' : 'NEXUS'}</div>
268
  <div class="msg-bubble">${bubbleContent}</div>
269
  ${imgHtml}
270
  ${srcHtml}`;
@@ -278,7 +278,7 @@ function appendThinking() {
278
  const div = document.createElement('div');
279
  div.className = 'msg assistant';
280
  div.innerHTML = `
281
- <div class="msg-role">NEXUS</div>
282
  <div class="msg-bubble">
283
  <div class="thinking-dots"><span></span><span></span><span></span></div>
284
  </div>`;
@@ -294,7 +294,7 @@ function clearChat() {
294
  document.getElementById('alphaLabel').textContent = 'βš– 0.5';
295
  document.getElementById('chatMessages').innerHTML = `
296
  <div class="msg assistant">
297
- <div class="msg-role">NEXUS</div>
298
  <div class="msg-bubble"><p class="msg-p">Chat cleared. Ask me anything about your documents.</p></div>
299
  </div>`;
300
  }
 
264
  : renderMarkdown(text);
265
 
266
  div.innerHTML = `
267
+ <div class="msg-role">${role === 'user' ? 'YOU' : 'MORPHEUS'}</div>
268
  <div class="msg-bubble">${bubbleContent}</div>
269
  ${imgHtml}
270
  ${srcHtml}`;
 
278
  const div = document.createElement('div');
279
  div.className = 'msg assistant';
280
  div.innerHTML = `
281
+ <div class="msg-role">MORPHEUS</div>
282
  <div class="msg-bubble">
283
  <div class="thinking-dots"><span></span><span></span><span></span></div>
284
  </div>`;
 
294
  document.getElementById('alphaLabel').textContent = 'βš– 0.5';
295
  document.getElementById('chatMessages').innerHTML = `
296
  <div class="msg assistant">
297
+ <div class="msg-role">MORPHEUS</div>
298
  <div class="msg-bubble"><p class="msg-p">Chat cleared. Ask me anything about your documents.</p></div>
299
  </div>`;
300
  }
supabase/migrations/0004_ingestion_retry_logs.sql ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Telemetry table for ingestion retry/backoff behavior.
2
+ -- Helps track 429 pressure and tune batch/backoff settings.
3
+
4
+ CREATE TABLE IF NOT EXISTS public.ingestion_retry_logs (
5
+ id bigserial PRIMARY KEY,
6
+ created_at timestamptz NOT NULL DEFAULT now(),
7
+ user_id uuid,
8
+ batch_num integer NOT NULL,
9
+ total_batches integer NOT NULL,
10
+ attempt integer NOT NULL,
11
+ event_type text NOT NULL, -- retry | success | failed
12
+ message text,
13
+ sleep_s double precision DEFAULT 0
14
+ );
15
+
16
+ CREATE INDEX IF NOT EXISTS ingestion_retry_logs_created_at_idx
17
+ ON public.ingestion_retry_logs (created_at DESC);
18
+
19
+ CREATE INDEX IF NOT EXISTS ingestion_retry_logs_user_id_idx
20
+ ON public.ingestion_retry_logs (user_id);
21
+