ALPHA0008 commited on
Commit
a688aff
·
1 Parent(s): f1c4fd6

refactor: replace sequential 3-node pipeline with parallel 13-node multi-agent architecture

Browse files

- Replaces old load_and_chunk → cluster_evidence → quality_normalize
pipeline with 13-node LangGraph graph using Send API fan-out
- Adds 3 parallel ingestion nodes (ingest_notion, ingest_slack, ingest_tickets)
with ingest_join barrier
- Adds 4 parallel extraction nodes (decisions, workflows, exceptions,
contradictions) with conditional routing
- Adds synthesize_skills, link_evidence, score_confidence, write_brain
sequential compilation pipeline
- Implements brain_agent with pre-computed embedding similarity search
and LLM threshold reasoning
- Removes old deprecated nodes from git tracking
- Updates CLAUDE.md to reflect current 13-node architecture, accurate
API endpoints, schema, and SSE pattern
- Updates .gitignore for session files, nul artifacts, and log files

.gitignore CHANGED
@@ -45,3 +45,14 @@ data/sources/*/
45
  brand_alchemy_company_brain.html
46
  company_brain_PRD_v4.md
47
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  brand_alchemy_company_brain.html
46
  company_brain_PRD_v4.md
47
 
48
+ # Claude Code session files (never commit these)
49
+ session-ses*.md
50
+
51
+ # Windows artifacts
52
+ nul
53
+ backend/nul
54
+
55
+ # Logs
56
+ backend_log.txt
57
+ *.log
58
+
CLAUDE.md CHANGED
@@ -14,35 +14,49 @@ Company Brain is a multi-agent compilation pipeline that extracts operational de
14
  ## Monorepo Structure
15
 
16
  ```
17
- company-brain/
18
  ├── backend/ ← FastAPI + LangGraph pipeline (Python)
19
  │ ├── main.py ← FastAPI app entry point
 
 
 
20
  │ ├── graph/
21
  │ │ ├── state.py ← BrainState TypedDict
22
- │ │ ├── nodes/ one file per LangGraph node
23
- │ │ │ ├── ingest_slack.py
24
- │ │├── ingest_notion.py
25
- │ │├── ingest_tickets.py
26
- │ │├── ingest_join.py
27
- │ │├── extract_decisions.py
28
- │ │├── extract_workflows.py
29
- │ │├── extract_exceptions.py
30
- │ │├── detect_contradictions.py
31
- │ │├── synthesize_skills.py
32
- │ │├── link_evidence.py
33
- │ │├── score_confidence.py
34
- │ │ │ └── write_brain.py
35
- │ │── graph.py ← graph assembly + compile
36
- ── agents/
37
- │ └── brain_agent.py ← query-time brain agent
 
38
  │ ├── db/
39
- │ │ ── supabase.py ← Supabase client + queries
 
40
  │ ├── models/
41
  │ │ └── schemas.py ← Pydantic models for API
42
- ── requirements.txt
43
- ── frontend/ ← Next.js 14 + Tailwind (Harshit)
 
 
 
 
 
 
 
 
 
44
  ├── data/
45
- │ └── sources/ ← 8 synthetic source files
46
  │ ├── notion_refund_sop.md
47
  │ ├── notion_pricing_policy.md
48
  │ ├── notion_eng_runbook.md
@@ -51,8 +65,13 @@ company-brain/
51
  │ ├── slack_export_support.json
52
  │ ├── slack_export_ops.json
53
  │ └── zendesk_tickets.json
54
- ── CLAUDE.md ← this file
 
 
 
 
55
  ```
 
56
 
57
  ---
58
 
@@ -102,33 +121,33 @@ result = response.choices[0].message.content
102
  ## BrainState — The Central Data Structure
103
 
104
  ```python
105
- from typing import TypedDict, Annotated
106
  import operator
107
 
108
  class BrainState(TypedDict):
109
  company_id: str
110
- source_files: list[dict] # [{filename, content, sha256, type}]
111
-
112
- # Ingestion outputs (parallel, accumulated with operator.add)
113
- normalized_events: Annotated[list[dict], operator.add] # from Slack
114
- structured_sops: Annotated[list[dict], operator.add] # from Notion
115
- resolved_cases: Annotated[list[dict], operator.add] # from tickets
116
-
117
- # Extraction outputs (parallel, accumulated with operator.add)
118
- raw_decisions: Annotated[list[dict], operator.add]
119
- workflow_steps: Annotated[list[dict], operator.add]
120
- exception_rules: Annotated[list[dict], operator.add]
121
- contradictions: Annotated[list[dict], operator.add]
122
-
123
- # Compilation outputs (sequential)
124
- draft_skills: list[dict]
125
- skills_with_evidence: list[dict]
126
- final_skills: list[dict]
127
-
128
- # Metadata
129
  job_id: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  brain_version: str
131
- errors: Annotated[list[str], operator.add]
 
132
  ```
133
 
134
  **The `Annotated[list, operator.add]` pattern is critical.** It allows multiple parallel nodes to write to the same list field without overwriting each other. Do not change this.
@@ -195,41 +214,20 @@ USER = """Extract all {type} from this company data:
195
 
196
  ---
197
 
198
- ## Skills File Schema (per skill)
199
 
200
  ```python
201
  {
202
- "id": "handle_refund_request", # snake_case
203
- "name": "Handle Refund Request", # human readable
204
- "domain": "support", # support|revenue|product_eng|customer_success|hr|finance_ops
205
- "version": "1.0",
206
- "confidence": 0.91, # 0.0 - 1.0
207
- "stale": False,
208
- "review_required": False, # True if confidence < 0.6
209
- "last_updated": "2026-05-04T09:30:00Z",
210
- "trigger": {
211
- "phrases": ["refund", "money back"],
212
- "conditions": ["customer mentions payment dissatisfaction"]
213
- },
214
- "decision_logic": [
215
- {
216
- "condition": "plan == 'annual' AND days_since_purchase <= 14",
217
- "action": "approve_full_refund",
218
- "note": "No-questions policy within 14 days.",
219
- "evidence_sources": [
220
- {
221
- "source": "notion_refund_sop.md",
222
- "excerpt": "Annual plan customers within 14 days...",
223
- "confidence": 0.95
224
- }
225
- ]
226
- }
227
- ],
228
- "forbidden_actions": [
229
- "Never process refunds for lifetime deal accounts"
230
  ],
231
- "escalation_chain": ["support_agent", "support_lead", "account_manager", "founder"],
232
- "sla": "respond_within_2h, resolve_within_24h"
 
233
  }
234
  ```
235
 
@@ -238,11 +236,11 @@ USER = """Extract all {type} from this company data:
238
  ## Confidence Scoring Formula
239
 
240
  ```python
241
- def score_confidence(skill: dict, all_sources: list[dict]) -> float:
242
  base = 0.5
243
 
244
  # More sources = higher confidence
245
- source_count = len(skill["decision_logic"][0].get("evidence_sources", []))
246
  if source_count >= 3:
247
  base += 0.25
248
  elif source_count == 2:
@@ -250,90 +248,65 @@ def score_confidence(skill: dict, all_sources: list[dict]) -> float:
250
  elif source_count == 1:
251
  base += 0.05
252
 
253
- # Recent sources = higher confidence
254
- # (check source file last_modified if available)
255
- base += 0.15 # assume recent for v0
256
 
257
  # No contradictions for this skill = higher confidence
258
- # (passed in from contradiction detector)
259
- has_contradiction = False # check contradictions list
 
 
 
 
260
  if not has_contradiction:
261
  base += 0.10
262
 
263
- return min(base, 1.0)
264
  ```
265
 
266
  ---
267
 
268
  ## Brain Agent Pattern
269
 
270
- ```python
271
- from sentence_transformers import SentenceTransformer
272
- import numpy as np
273
-
274
- # Load once at startup
275
- embedder = SentenceTransformer('all-MiniLM-L6-v2')
276
-
277
- # Pre-compute skill embeddings (call after compile)
278
- skill_embeddings = {} # {skill_id: np.array}
279
-
280
- def compute_skill_embeddings(skills: list[dict]):
281
- global skill_embeddings
282
- for skill in skills:
283
- text = f"{skill['name']} {' '.join(skill['trigger']['phrases'])}"
284
- skill_embeddings[skill['id']] = embedder.encode(text)
285
-
286
- def match_skill(query: str) -> tuple[str, float]:
287
- query_emb = embedder.encode(query)
288
- scores = {}
289
- for skill_id, emb in skill_embeddings.items():
290
- score = float(np.dot(query_emb, emb) /
291
- (np.linalg.norm(query_emb) * np.linalg.norm(emb)))
292
- scores[skill_id] = score
293
- best_id = max(scores, key=scores.get)
294
- return best_id, scores[best_id]
295
-
296
- def skill_to_markdown(skill: dict) -> str:
297
- """Convert skill JSON to markdown for prompt injection."""
298
- lines = [f"## {skill['name']}", ""]
299
- for logic in skill['decision_logic']:
300
- lines.append(f"- IF {logic['condition']}: {logic['action']}")
301
- if logic.get('note'):
302
- lines.append(f" Note: {logic['note']}")
303
- lines.append("")
304
- lines.append("FORBIDDEN: " + "; ".join(skill['forbidden_actions']))
305
- lines.append("ESCALATE: " + " → ".join(skill['escalation_chain']))
306
- return "\n".join(lines)
307
- ```
308
 
309
  ---
310
 
311
- ## FastAPI SSE Pattern
 
 
312
 
313
  ```python
314
- from fastapi import FastAPI
315
- from fastapi.responses import StreamingResponse
316
- import asyncio
317
- import json
318
-
319
- async def event_generator(job_id: str):
320
- """Yields SSE events during compilation."""
321
- async for event in compilation_events[job_id]:
322
- yield f"event: {event['type']}\ndata: {json.dumps(event['data'])}\n\n"
323
-
324
- @app.get("/compile/stream")
325
- async def stream_compile(job_id: str):
326
- return StreamingResponse(
327
- event_generator(job_id),
328
- media_type="text/event-stream",
329
- headers={
330
- "Cache-Control": "no-cache",
331
- "Connection": "keep-alive",
332
- "Access-Control-Allow-Origin": "*" # CORS for frontend
333
- }
334
- )
335
  ```
336
 
 
 
337
  ---
338
 
339
  ## Supabase Tables
@@ -359,18 +332,20 @@ CREATE TABLE skills_files (
359
  is_current BOOLEAN DEFAULT false
360
  );
361
 
362
- CREATE UNIQUE INDEX idx_one_current_per_company
363
- ON skills_files(company_id) WHERE is_current = true;
364
 
365
- CREATE TABLE compile_runs (
366
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
367
  company_id TEXT REFERENCES companies(id),
368
- status TEXT CHECK (status IN ('started','running','complete','error')),
369
- started_at TIMESTAMPTZ DEFAULT now(),
370
- completed_at TIMESTAMPTZ,
371
- duration_ms INTEGER,
372
- result_version TEXT,
373
- error_detail TEXT
 
 
 
374
  );
375
 
376
  CREATE TABLE source_files (
@@ -378,10 +353,23 @@ CREATE TABLE source_files (
378
  company_id TEXT REFERENCES companies(id),
379
  filename TEXT NOT NULL,
380
  sha256 TEXT NOT NULL,
381
- content TEXT NOT NULL,
382
- source_type TEXT CHECK (source_type IN ('slack_json','notion_md','tickets_json')),
383
  uploaded_at TIMESTAMPTZ DEFAULT now()
384
  );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  ```
386
 
387
  ---
@@ -401,14 +389,20 @@ COMPANY_ID=rivanly-inc
401
  ## API Endpoints — Full List
402
 
403
  ```
404
- POST /compile → trigger pipeline, returns {job_id, stream_url}
405
- GET /compile/stream SSE stream for job_id
406
- GET /brain/status current brain version + stats
407
- GET /skills all skills (lightweight)
408
- GET /skills/{id} full skill detail
409
- POST /agent/handle brain agent query
410
- GET /diff/{v1}/{v2}version diff
411
- POST /sources/upload upload source files
 
 
 
 
 
 
412
  ```
413
 
414
  ---
@@ -420,7 +414,7 @@ POST /sources/upload → upload source files
420
  3. **Never read raw source files at query time** — brain agent reads skills file only
421
  4. **All LLM calls wrapped in try/except** — retry once on JSON parse failure, return `[]` if still failing
422
  5. **`skills_files.is_current` enforced by partial unique index** — only one current per company
423
- 6. **`compile_runs` table is append-only** — never update rows, only insert
424
  7. **CORS headers on all endpoints** — frontend is on different domain
425
  8. **Temperature 0.1 on all extraction calls** — deterministic is better than creative here
426
 
 
14
  ## Monorepo Structure
15
 
16
  ```
17
+ kernl/
18
  ├── backend/ ← FastAPI + LangGraph pipeline (Python)
19
  │ ├── main.py ← FastAPI app entry point
20
+ │ ├── llm.py ← vLLM client, semaphore(4), embeddings, JSON self-repair
21
+ │ ├── sse.py ← Server-Sent Events bus for streaming
22
+ │ ├── test_compile.py ← Standalone graph test
23
  │ ├── graph/
24
  │ │ ├── state.py ← BrainState TypedDict
25
+ │ │ ├── graph.py graph assembly + compile
26
+ │ │ ── nodes/ ← one file per LangGraph node
27
+ │ │ ├── load_sources.py
28
+ │ │ ├── ingest_slack.py
29
+ │ │ ├── ingest_notion.py
30
+ │ │ ├── ingest_tickets.py
31
+ │ │ ├── ingest_join.py
32
+ │ │ ├── extract_decisions.py
33
+ │ │ ├── extract_workflows.py
34
+ │ │ ├── extract_exceptions.py
35
+ │ │ ├── detect_contradictions.py
36
+ │ │ ├── synthesize_skills.py
37
+ │ │── link_evidence.py
38
+ │ │── score_confidence.py
39
+ │ └── write_brain.py
40
+ ── agent/
41
+ │ │ └── brain_agent.py ← query-time brain agent (embedding + LLM reasoning)
42
  │ ├── db/
43
+ │ │ ── supabase.py ← Supabase client + queries
44
+ │ │ └── schema.sql ← DB schema (5 tables)
45
  │ ├── models/
46
  │ │ └── schemas.py ← Pydantic models for API
47
+ ── requirements.txt
48
+ │ └── .env.example
49
+ ├── frontend/ ← Next.js 16.2.5 + Tailwind v4
50
+ │ ├── src/app/
51
+ │ │ ├── page.tsx ← Dashboard
52
+ │ │ ├── layout.tsx ← Root layout
53
+ │ │ ├── globals.css ← Tailwind + custom theme
54
+ │ │ ├── compile/[jobId]/page.tsx ← Pipeline stream viewer
55
+ │ │ ├── skills/[companyId]/page.tsx ← Skills viewer
56
+ │ │ └── demo/[companyId]/page.tsx ← Brain vs Generic A/B comparison
57
+ │ └── ...
58
  ├── data/
59
+ │ └── sources/rivanly-inc/ ← 8 synthetic source files
60
  │ ├── notion_refund_sop.md
61
  │ ├── notion_pricing_policy.md
62
  │ ├── notion_eng_runbook.md
 
65
  │ ├── slack_export_support.json
66
  │ ├── slack_export_ops.json
67
  │ └── zendesk_tickets.json
68
+ ── scripts/
69
+ │ ├── smoke_test.py ← Dynamic policy change propagation test
70
+ │ └── stress_test.py ← Resilience test (malformed input, contradictions)
71
+ ├── CLAUDE.md ← this file
72
+ └── .gitignore
73
  ```
74
+ **Note:** `backend/agents/` is empty — `brain_agent.py` lives in `backend/agent/` instead.
75
 
76
  ---
77
 
 
121
  ## BrainState — The Central Data Structure
122
 
123
  ```python
124
+ from typing import TypedDict, Annotated, List, Dict, Any
125
  import operator
126
 
127
  class BrainState(TypedDict):
128
  company_id: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  job_id: str
130
+ source_files: Annotated[List[Dict[str, Any]], operator.add]
131
+
132
+ structured_sops: Annotated[List[Dict[str, Any]], operator.add]
133
+ normalized_events: Annotated[List[Dict[str, Any]], operator.add]
134
+ resolved_cases: Annotated[List[Dict[str, Any]], operator.add]
135
+
136
+ all_chunks: List[Dict[str, Any]]
137
+
138
+ raw_decisions: Annotated[List[Dict[str, Any]], operator.add]
139
+ workflow_steps: Annotated[List[Dict[str, Any]], operator.add]
140
+ exception_rules: Annotated[List[Dict[str, Any]], operator.add]
141
+ contradictions: Annotated[List[Dict[str, Any]], operator.add]
142
+
143
+ draft_skills: List[Dict[str, Any]]
144
+ skills_with_evidence: List[Dict[str, Any]]
145
+ final_skills: List[Dict[str, Any]]
146
+
147
+ skills_file: Dict[str, Any]
148
  brain_version: str
149
+ start_time: float
150
+ errors: Annotated[List[str], operator.add]
151
  ```
152
 
153
  **The `Annotated[list, operator.add]` pattern is critical.** It allows multiple parallel nodes to write to the same list field without overwriting each other. Do not change this.
 
214
 
215
  ---
216
 
217
+ ## Skills File Schema (per skill — pipeline output)
218
 
219
  ```python
220
  {
221
+ "id": "handle_refund_request", # snake_case
222
+ "category": "Customer Support", # operational domain
223
+ "rule": "Approve full refund for annual plans within 14 days", # actionable rule text
224
+ "rationale": "No-questions policy within 14 days for annual plans",
225
+ "evidence": [
226
+ "notion_refund_sop.md: Annual plan customers within 14 days..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  ],
228
+ "source_files": ["notion_refund_sop.md"],
229
+ "confidence": 0.85, # 0.0 - 1.0 (scored by score_confidence node)
230
+ "embedding_vector": [...] # pre-computed for semantic matching
231
  }
232
  ```
233
 
 
236
  ## Confidence Scoring Formula
237
 
238
  ```python
239
+ def score_confidence(skill: dict, contradictions: list) -> float:
240
  base = 0.5
241
 
242
  # More sources = higher confidence
243
+ source_count = len(skill.get("evidence", []))
244
  if source_count >= 3:
245
  base += 0.25
246
  elif source_count == 2:
 
248
  elif source_count == 1:
249
  base += 0.05
250
 
251
+ # Recency bonus (assume recent for v0)
252
+ base += 0.15
 
253
 
254
  # No contradictions for this skill = higher confidence
255
+ skill_id = skill.get("id", "")
256
+ has_contradiction = any(
257
+ c.get("id", "").startswith(skill_id.split("_")[0])
258
+ or skill_id in str(c.get("domain", ""))
259
+ for c in contradictions
260
+ )
261
  if not has_contradiction:
262
  base += 0.10
263
 
264
+ return round(min(base, 1.0), 2)
265
  ```
266
 
267
  ---
268
 
269
  ## Brain Agent Pattern
270
 
271
+ The brain agent at `backend/agent/brain_agent.py` uses:
272
+ 1. **Embedding similarity** — encodes the query with `all-MiniLM-L6-v2` and scores all skills via cosine similarity
273
+ 2. **Top-K retrieval** — fetches 5 best-matching skills
274
+ 3. **LLM reasoning** — injects retrieved skills into the prompt with the scenario and does arithmetic threshold analysis
275
+ 4. **JSON parsing** extracts the response with a fallback for malformed JSON
276
+
277
+ Key behavior:
278
+ - Uses **pre-computed embeddings** (stored in DB by write_brain node) or computes on-the-fly
279
+ - The LLM prompt has explicit step-by-step threshold comparison logic
280
+ - Gibberish rejection: low embedding similarity → low confidence → meaningful fallback
281
+ - A/B comparison: `with_brain=True/False` to compare against a generic baseline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
  ---
284
 
285
+ ## SSE Event Bus Pattern
286
+
287
+ `backend/sse.py` uses an `asyncio.Queue` per job_id with a `CompilationEventBus` singleton. Events are unnamed (no `event:` field) — the frontend uses `EventSource.onmessage` which fires on unnamed events. Payload is wrapped: `data: {"event": "<type>", "data": {<payload>}}\n\n`.
288
 
289
  ```python
290
+ class CompilationEventBus:
291
+ def __init__(self):
292
+ self.queues: Dict[str, asyncio.Queue] = {}
293
+
294
+ async def emit_event(self, job_id: str, event_type: str, data: dict):
295
+ queue = self.get_queue(job_id)
296
+ await queue.put({"type": event_type, "data": data})
297
+
298
+ async def event_generator(self, job_id: str) -> AsyncGenerator[str, None]:
299
+ queue = self.get_queue(job_id)
300
+ while True:
301
+ event = await asyncio.wait_for(queue.get(), timeout=300)
302
+ payload = json.dumps({"event": event["type"], "data": event["data"]})
303
+ yield f"data: {payload}\n\n"
304
+ if event["type"] in ["pipeline_complete", "pipeline_error"]:
305
+ break
 
 
 
 
 
306
  ```
307
 
308
+ Queue auto-cleaned in `finally` block after completion or error.
309
+
310
  ---
311
 
312
  ## Supabase Tables
 
332
  is_current BOOLEAN DEFAULT false
333
  );
334
 
335
+ CREATE UNIQUE INDEX idx_skills_files_current ON skills_files(company_id) WHERE is_current = true;
 
336
 
337
+ CREATE TABLE skills (
338
+ id TEXT NOT NULL,
339
  company_id TEXT REFERENCES companies(id),
340
+ skills_file_id UUID REFERENCES skills_files(id),
341
+ name TEXT NOT NULL,
342
+ domain TEXT NOT NULL,
343
+ version TEXT NOT NULL,
344
+ confidence FLOAT NOT NULL,
345
+ stale BOOLEAN DEFAULT false,
346
+ review_required BOOLEAN DEFAULT false,
347
+ skill_json JSONB NOT NULL,
348
+ PRIMARY KEY (id, company_id, skills_file_id)
349
  );
350
 
351
  CREATE TABLE source_files (
 
353
  company_id TEXT REFERENCES companies(id),
354
  filename TEXT NOT NULL,
355
  sha256 TEXT NOT NULL,
356
+ storage_path TEXT NOT NULL,
 
357
  uploaded_at TIMESTAMPTZ DEFAULT now()
358
  );
359
+
360
+ CREATE TABLE compile_runs (
361
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
362
+ company_id TEXT REFERENCES companies(id),
363
+ status TEXT NOT NULL CHECK (status IN ('started','running','complete','error')),
364
+ started_at TIMESTAMPTZ DEFAULT now(),
365
+ completed_at TIMESTAMPTZ,
366
+ duration_ms INTEGER,
367
+ result_version TEXT,
368
+ error_detail TEXT
369
+ );
370
+
371
+ CREATE INDEX idx_skills_files_company ON skills_files(company_id, compiled_at DESC);
372
+ CREATE INDEX idx_skills_company ON skills(company_id);
373
  ```
374
 
375
  ---
 
389
  ## API Endpoints — Full List
390
 
391
  ```
392
+ POST /compile → trigger pipeline, returns {job_id, status}
393
+ POST /compile/run alias for /compile
394
+ GET /compile/{job_id}/stream SSE stream for live compilation progress
395
+ GET /compile/{job_id}/status poll job status (started/running/complete/error)
396
+ GET /health API health + vLLM + DB status
397
+ POST /sources/upload upload a source file
398
+ GET /sources/{company_id} → list all source files
399
+ DELETE /sources/{company_id}/{filename} delete a source file
400
+ POST /agent/handle → brain agent query (legacy schema)
401
+ POST /agent/query → brain agent query (canonical schema)
402
+ GET /skills → get current brain JSON (legacy)
403
+ GET /skills/{company_id} → get current brain with version + metadata
404
+ GET /brain/versions/{company_id}→ list all compiled versions
405
+ GET /diff/{v1}/{v2} → semantic diff between two brain versions
406
  ```
407
 
408
  ---
 
414
  3. **Never read raw source files at query time** — brain agent reads skills file only
415
  4. **All LLM calls wrapped in try/except** — retry once on JSON parse failure, return `[]` if still failing
416
  5. **`skills_files.is_current` enforced by partial unique index** — only one current per company
417
+ 6. **`compile_runs` table is append-only** — never update rows, only insert status
418
  7. **CORS headers on all endpoints** — frontend is on different domain
419
  8. **Temperature 0.1 on all extraction calls** — deterministic is better than creative here
420
 
backend/agent/brain_agent.py CHANGED
@@ -1,25 +1,27 @@
1
  import json
 
2
  from backend.db.supabase import get_client
3
- from backend.llm import llm_call, get_embedding, cosine_similarity
4
 
5
 
6
- async def handle_agent_query(company_id: str, scenario: str, context: dict = None, with_brain: bool = True) -> dict:
7
- """
8
- Real agent query handler. No keyword routing, no hardcoded actions.
9
- Everything flows through: retrieve skills -> build prompt -> call vLLM -> return raw result.
10
- """
11
  if not with_brain:
12
  return await _baseline_query(scenario, context)
13
 
14
- # --- WITH BRAIN ---
15
  db = get_client()
16
  if not db:
17
  return _error_response("Database connection failed.")
18
 
19
- # 1. Fetch latest compiled skills
20
- res = db.table("skills_files").select("brain_json").eq(
21
- "company_id", company_id
22
- ).order("compiled_at", desc=True).limit(1).execute()
 
 
 
 
23
 
24
  if not res.data:
25
  return _error_response("No compiled brain found. Please compile first.")
@@ -28,58 +30,108 @@ async def handle_agent_query(company_id: str, scenario: str, context: dict = Non
28
  if not skills:
29
  return _error_response("Brain is empty — no skills compiled.")
30
 
31
- # 2. Embed the query and score every skill
32
  query_text = f"{scenario} {json.dumps(context or {})}"
33
  query_emb = get_embedding(query_text)
34
 
35
- scored = []
36
- for i, skill in enumerate(skills):
37
- skill_text = f"{skill.get('category', '')} {skill.get('rule', '')} {skill.get('rationale', '')}"
38
- skill_emb = get_embedding(skill_text)
39
- score = cosine_similarity(query_emb, skill_emb)
40
- scored.append({"skill": skill, "score": round(score, 4), "index": i})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  scored.sort(key=lambda x: x["score"], reverse=True)
43
  top_results = scored[:5]
44
  retrieval_scores = [s["score"] for s in top_results]
45
 
46
- # 3. Build skills context for the LLM
47
  skills_context = ""
48
  for rank, s in enumerate(top_results):
49
  sk = s["skill"]
50
- skills_context += f"\n--- Skill #{rank+1} (retrieval_score: {s['score']}) ---\n"
 
 
51
  skills_context += f"Category: {sk.get('category', 'Unknown')}\n"
52
  skills_context += f"Rule: {sk.get('rule', '')}\n"
53
  skills_context += f"Rationale: {sk.get('rationale', '')}\n"
54
- skills_context += f"Evidence: {json.dumps(sk.get('evidence', []))}\n"
 
 
55
  skills_context += f"Compiled Confidence: {sk.get('confidence', 'unknown')}\n"
56
 
57
- # 4. Prompt the LLM - no example confidence values to bias it
58
- prompt = """You are the Kernl Brain Agent. You have access to this company's compiled operational skills (retrieved below, ranked by relevance).
59
-
60
- Your task:
61
- 1. Read the scenario and optional JSON context carefully.
62
- 2. Examine the retrieved skills and their retrieval_scores.
63
- 3. Determine whether any skill clearly applies to this scenario.
64
- 4. If a skill applies, state the specific recommended action from that skill's rule.
65
- 5. If NO skill applies, or if the input is nonsensical/gibberish, say so honestly.
66
-
67
- CONFIDENCE SCORING - base it on real signals:
68
- - retrieval_score < 0.3 -> scenario is likely unrelated to any skill -> confidence < 0.2
69
- - retrieval_score 0.3-0.5 -> weak match -> confidence 0.2-0.5
70
- - retrieval_score 0.5-0.7 -> moderate match -> confidence 0.5-0.75
71
- - retrieval_score > 0.7 AND rule clearly addresses the scenario -> confidence 0.75-0.95
72
- - Never exceed 0.95 unless the match is exact and unambiguous.
73
- - Gibberish or nonsensical input -> confidence 0.0, recommended_action = "unable to determine"
74
-
75
- Respond with ONLY a JSON object (no markdown fences, no text outside the JSON):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  {
77
- "recommended_action": "the specific action to take",
78
- "rule_applied": "exact rule text from the best matching skill",
79
- "evidence": ["evidence items from the skill"],
80
- "skill_matched": "the category of the matched skill",
81
  "confidence": 0.0,
82
- "reasoning": "explain why this skill applies and how you chose the confidence level"
83
  }"""
84
 
85
  user_content = f"--- Scenario ---\n{scenario}\n\n--- Additional Context ---\n{json.dumps(context or {})}\n\n--- Retrieved Skills (ranked by relevance) ---\n{skills_context}"
@@ -87,11 +139,11 @@ Respond with ONLY a JSON object (no markdown fences, no text outside the JSON):
87
  response_str = await llm_call(prompt, user_content)
88
  result = _parse_json(response_str)
89
  result["retrieval_scores"] = retrieval_scores
 
90
  return result
91
 
92
 
93
  async def _baseline_query(scenario: str, context: dict = None) -> dict:
94
- """Without-brain baseline: LLM answers with zero company context."""
95
  prompt = """You are a generic AI assistant. You have NO company-specific knowledge or policies.
96
  Answer based only on general industry standards. Be honest about your lack of specific context.
97
  Respond with ONLY a JSON object:
@@ -110,7 +162,6 @@ Respond with ONLY a JSON object:
110
 
111
 
112
  def _parse_json(raw: str) -> dict:
113
- """Parse LLM response as JSON, stripping markdown fences."""
114
  try:
115
  clean = raw.strip()
116
  if clean.startswith("```json"):
@@ -128,7 +179,7 @@ def _parse_json(raw: str) -> dict:
128
  "skill_matched": "none",
129
  "confidence": 0.0,
130
  "retrieval_scores": [],
131
- "reasoning": f"JSON parse error: {e}. Raw: {raw[:500]}"
132
  }
133
 
134
 
@@ -140,5 +191,5 @@ def _error_response(msg: str) -> dict:
140
  "skill_matched": "none",
141
  "confidence": 0.0,
142
  "retrieval_scores": [],
143
- "reasoning": msg
144
  }
 
1
  import json
2
+ import numpy as np
3
  from backend.db.supabase import get_client
4
+ from backend.llm import llm_call, get_embedding
5
 
6
 
7
+ async def handle_agent_query(
8
+ company_id: str, scenario: str, context: dict = None, with_brain: bool = True
9
+ ) -> dict:
 
 
10
  if not with_brain:
11
  return await _baseline_query(scenario, context)
12
 
 
13
  db = get_client()
14
  if not db:
15
  return _error_response("Database connection failed.")
16
 
17
+ res = (
18
+ db.table("skills_files")
19
+ .select("brain_json")
20
+ .eq("company_id", company_id)
21
+ .order("compiled_at", desc=True)
22
+ .limit(1)
23
+ .execute()
24
+ )
25
 
26
  if not res.data:
27
  return _error_response("No compiled brain found. Please compile first.")
 
30
  if not skills:
31
  return _error_response("Brain is empty — no skills compiled.")
32
 
 
33
  query_text = f"{scenario} {json.dumps(context or {})}"
34
  query_emb = get_embedding(query_text)
35
 
36
+ cached = True
37
+ for s in skills:
38
+ if "embedding_vector" not in s:
39
+ cached = False
40
+ break
41
+
42
+ if cached:
43
+ skill_embs = np.array([s["embedding_vector"] for s in skills])
44
+ query_vec = np.array(query_emb)
45
+ norms = np.linalg.norm(skill_embs, axis=1) * np.linalg.norm(query_vec)
46
+ norms[norms == 0] = 1e-10
47
+ scores = np.dot(skill_embs, query_vec) / norms
48
+ top_indices = np.argsort(scores)[-5:][::-1]
49
+ scored = []
50
+ for idx in top_indices:
51
+ scored.append(
52
+ {
53
+ "skill": skills[idx],
54
+ "score": round(float(scores[idx]), 4),
55
+ "index": int(idx),
56
+ }
57
+ )
58
+ else:
59
+ scored = []
60
+ for i, skill in enumerate(skills):
61
+ skill_text = f"{skill.get('category', '')} {skill.get('rule', '')} {skill.get('rationale', '')}"
62
+ skill_emb = get_embedding(skill_text)
63
+ score = float(
64
+ np.dot(query_emb, skill_emb)
65
+ / (np.linalg.norm(query_emb) * np.linalg.norm(skill_emb) + 1e-10)
66
+ )
67
+ scored.append({"skill": skill, "score": round(score, 4), "index": i})
68
 
69
  scored.sort(key=lambda x: x["score"], reverse=True)
70
  top_results = scored[:5]
71
  retrieval_scores = [s["score"] for s in top_results]
72
 
 
73
  skills_context = ""
74
  for rank, s in enumerate(top_results):
75
  sk = s["skill"]
76
+ skills_context += (
77
+ f"\n--- Skill #{rank + 1} (retrieval_score: {s['score']}) ---\n"
78
+ )
79
  skills_context += f"Category: {sk.get('category', 'Unknown')}\n"
80
  skills_context += f"Rule: {sk.get('rule', '')}\n"
81
  skills_context += f"Rationale: {sk.get('rationale', '')}\n"
82
+ evidence = sk.get("evidence", [])
83
+ if isinstance(evidence, list):
84
+ skills_context += f"Evidence: {json.dumps(evidence[:3])}\n"
85
  skills_context += f"Compiled Confidence: {sk.get('confidence', 'unknown')}\n"
86
 
87
+ prompt = """You are a logical policy reasoning engine. Your ONLY job is to compare scenario parameters against rule thresholds using pure arithmetic, then output the correct action.
88
+
89
+ CRITICAL LANGUAGE INTERPRETATION RULES:
90
+ - "No refunds after X days" means: refunds ARE allowed if the scenario is BEFORE X days. The word "after" creates a threshold at X. Below X = allowed. Above X = denied.
91
+ - "Full refund within X days" means: refunds are allowed ONLY if scenario is WITHIN X days. Below X = allowed. Above X = denied.
92
+ - "No refunds for X" (without a threshold) is an absolute ban.
93
+
94
+ ALWAYS compute: does the scenario value fall on the ALLOWED side or the DENIED side of the threshold?
95
+
96
+ Follow these exact steps:
97
+ STEP 1: Extract numeric thresholds from the matched rule (e.g., "60 days" → 60).
98
+ STEP 2: Extract the corresponding parameter from the scenario (e.g., days_since_purchase=45).
99
+ STEP 3: COMPARE: Write the comparison explicitly (e.g., "45 < 60, so customer is BEFORE the threshold").
100
+ STEP 4: DECIDE based solely on the comparison outcome.
101
+
102
+ Example A:
103
+ Rule: "No refunds after 60 days. If purchase was more than 60 days ago, deny."
104
+ Scenario: days_since_purchase=45
105
+ STEP 1: threshold = 60 days
106
+ STEP 2: scenario = 45 days
107
+ STEP 3: 45 < 60, customer is BEFORE the threshold
108
+ STEP 4: Action = approve (customer qualifies under 60-day limit)
109
+
110
+ Example B:
111
+ Rule: "Full refund only within 14 days of purchase"
112
+ Scenario: days_since_purchase=45
113
+ STEP 1: threshold = 14 days
114
+ STEP 2: scenario = 45 days
115
+ STEP 3: 45 > 14, customer is AFTER the threshold
116
+ STEP 4: Action = deny (outside the refund window)
117
+
118
+ Your recommended_action MUST exactly match what the math says. Do not let the emotional tone of the rule ("absolutely no", "no exceptions") override the arithmetic threshold.
119
+
120
+ confidence:
121
+ - retrieval_score < 0.3 → 0.0-0.2 (unrelated)
122
+ - 0.3-0.5 → 0.2-0.5 (weak)
123
+ - 0.5-0.7 → 0.5-0.75 (moderate)
124
+ - > 0.7 and correct match → 0.75-0.95 (strong)
125
+ - gibberish → 0.0
126
+
127
+ Respond with ONLY this JSON:
128
  {
129
+ "recommended_action": "action based on your math comparison",
130
+ "rule_applied": "exact rule text from best matching skill",
131
+ "evidence": ["evidence items"],
132
+ "skill_matched": "skill category",
133
  "confidence": 0.0,
134
+ "reasoning": "STEP 1: [threshold] STEP 2: [scenario value] STEP 3: [numeric comparison] STEP 4: [action]"
135
  }"""
136
 
137
  user_content = f"--- Scenario ---\n{scenario}\n\n--- Additional Context ---\n{json.dumps(context or {})}\n\n--- Retrieved Skills (ranked by relevance) ---\n{skills_context}"
 
139
  response_str = await llm_call(prompt, user_content)
140
  result = _parse_json(response_str)
141
  result["retrieval_scores"] = retrieval_scores
142
+ result["cached_embedding"] = cached
143
  return result
144
 
145
 
146
  async def _baseline_query(scenario: str, context: dict = None) -> dict:
 
147
  prompt = """You are a generic AI assistant. You have NO company-specific knowledge or policies.
148
  Answer based only on general industry standards. Be honest about your lack of specific context.
149
  Respond with ONLY a JSON object:
 
162
 
163
 
164
  def _parse_json(raw: str) -> dict:
 
165
  try:
166
  clean = raw.strip()
167
  if clean.startswith("```json"):
 
179
  "skill_matched": "none",
180
  "confidence": 0.0,
181
  "retrieval_scores": [],
182
+ "reasoning": f"JSON parse error: {e}. Raw: {raw[:500]}",
183
  }
184
 
185
 
 
191
  "skill_matched": "none",
192
  "confidence": 0.0,
193
  "retrieval_scores": [],
194
+ "reasoning": msg,
195
  }
backend/db/supabase.py CHANGED
@@ -14,50 +14,88 @@ else:
14
  # but actual DB calls will fail if not provided.
15
  supabase = None
16
 
 
17
  def get_client():
18
  return supabase
19
 
 
20
  def get_current_brain(company_id: str):
21
- if not supabase: return None
22
- res = supabase.table("skills_files").select("*").eq("company_id", company_id).eq("is_current", True).execute()
 
 
 
 
 
 
 
23
  if res.data:
24
  return res.data[0]
25
  return None
26
 
 
27
  def save_skills_file(data: dict):
28
- if not supabase: return None
 
29
  res = supabase.table("skills_files").insert(data).execute()
30
  return res.data
31
 
 
32
  def save_compile_run(data: dict):
33
- if not supabase: return None
 
34
  res = supabase.table("compile_runs").insert(data).execute()
35
  return res.data
36
 
 
37
  def update_compile_run(run_id: str, data: dict):
38
- if not supabase: return None
 
39
  res = supabase.table("compile_runs").update(data).eq("id", run_id).execute()
40
  return res.data
41
 
 
42
  def get_source_hashes(company_id: str):
43
- if not supabase: return {}
 
44
  # Get the latest current brain
45
  brain = get_current_brain(company_id)
46
  if brain:
47
  return brain.get("source_hashes", {})
48
  return {}
49
 
 
50
  def save_source_file(data: dict):
51
- if not supabase: return None
 
52
  res = supabase.table("source_files").insert(data).execute()
53
  return res.data
54
 
 
55
  def get_skills_by_brain_id(brain_id: str):
56
- if not supabase: return []
 
57
  res = supabase.table("skills").select("*").eq("skills_file_id", brain_id).execute()
58
  return res.data
59
 
 
60
  def insert_skills(data: list):
61
- if not supabase: return None
 
62
  res = supabase.table("skills").insert(data).execute()
63
  return res.data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # but actual DB calls will fail if not provided.
15
  supabase = None
16
 
17
+
18
  def get_client():
19
  return supabase
20
 
21
+
22
  def get_current_brain(company_id: str):
23
+ if not supabase:
24
+ return None
25
+ res = (
26
+ supabase.table("skills_files")
27
+ .select("*")
28
+ .eq("company_id", company_id)
29
+ .eq("is_current", True)
30
+ .execute()
31
+ )
32
  if res.data:
33
  return res.data[0]
34
  return None
35
 
36
+
37
  def save_skills_file(data: dict):
38
+ if not supabase:
39
+ return None
40
  res = supabase.table("skills_files").insert(data).execute()
41
  return res.data
42
 
43
+
44
  def save_compile_run(data: dict):
45
+ if not supabase:
46
+ return None
47
  res = supabase.table("compile_runs").insert(data).execute()
48
  return res.data
49
 
50
+
51
  def update_compile_run(run_id: str, data: dict):
52
+ if not supabase:
53
+ return None
54
  res = supabase.table("compile_runs").update(data).eq("id", run_id).execute()
55
  return res.data
56
 
57
+
58
  def get_source_hashes(company_id: str):
59
+ if not supabase:
60
+ return {}
61
  # Get the latest current brain
62
  brain = get_current_brain(company_id)
63
  if brain:
64
  return brain.get("source_hashes", {})
65
  return {}
66
 
67
+
68
  def save_source_file(data: dict):
69
+ if not supabase:
70
+ return None
71
  res = supabase.table("source_files").insert(data).execute()
72
  return res.data
73
 
74
+
75
  def get_skills_by_brain_id(brain_id: str):
76
+ if not supabase:
77
+ return []
78
  res = supabase.table("skills").select("*").eq("skills_file_id", brain_id).execute()
79
  return res.data
80
 
81
+
82
  def insert_skills(data: list):
83
+ if not supabase:
84
+ return None
85
  res = supabase.table("skills").insert(data).execute()
86
  return res.data
87
+
88
+
89
+ def get_brain_by_version(company_id: str, version: str):
90
+ if not supabase:
91
+ return None
92
+ res = (
93
+ supabase.table("skills_files")
94
+ .select("*")
95
+ .eq("company_id", company_id)
96
+ .eq("version", version)
97
+ .execute()
98
+ )
99
+ if res.data:
100
+ return res.data[0]
101
+ return None
backend/graph/graph.py CHANGED
@@ -1,30 +1,124 @@
1
  from langgraph.graph import StateGraph, END
 
2
  from backend.graph.state import BrainState
3
- from backend.graph.nodes.load_and_chunk import load_and_chunk
4
- from backend.graph.nodes.cluster_evidence import cluster_evidence
 
 
 
 
 
 
 
5
  from backend.graph.nodes.synthesize_skills import synthesize_skills
6
- from backend.graph.nodes.quality_normalize import quality_normalize
 
7
  from backend.graph.nodes.write_brain import write_brain
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def build_compilation_graph() -> StateGraph:
11
  """
12
- Linear 5-node pipeline:
13
- load_and_chunk → cluster_evidence → synthesize_skills → quality_normalize → write_brain
 
 
 
 
 
 
 
14
  """
15
  workflow = StateGraph(BrainState)
16
 
17
- workflow.add_node("load_and_chunk", load_and_chunk)
18
- workflow.add_node("cluster_evidence", cluster_evidence)
 
 
 
 
 
 
 
 
 
 
 
 
19
  workflow.add_node("synthesize_skills", synthesize_skills)
20
- workflow.add_node("quality_normalize", quality_normalize)
 
21
  workflow.add_node("write_brain", write_brain)
22
 
23
- workflow.set_entry_point("load_and_chunk")
24
- workflow.add_edge("load_and_chunk", "cluster_evidence")
25
- workflow.add_edge("cluster_evidence", "synthesize_skills")
26
- workflow.add_edge("synthesize_skills", "quality_normalize")
27
- workflow.add_edge("quality_normalize", "write_brain")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  workflow.add_edge("write_brain", END)
29
 
30
  return workflow.compile()
 
1
  from langgraph.graph import StateGraph, END
2
+ from langgraph.types import Send
3
  from backend.graph.state import BrainState
4
+ from backend.graph.nodes.load_sources import load_sources
5
+ from backend.graph.nodes.ingest_notion import ingest_notion
6
+ from backend.graph.nodes.ingest_slack import ingest_slack
7
+ from backend.graph.nodes.ingest_tickets import ingest_tickets
8
+ from backend.graph.nodes.ingest_join import ingest_join
9
+ from backend.graph.nodes.extract_decisions import extract_decisions
10
+ from backend.graph.nodes.extract_workflows import extract_workflows
11
+ from backend.graph.nodes.extract_exceptions import extract_exceptions
12
+ from backend.graph.nodes.detect_contradictions import detect_contradictions
13
  from backend.graph.nodes.synthesize_skills import synthesize_skills
14
+ from backend.graph.nodes.link_evidence import link_evidence
15
+ from backend.graph.nodes.score_confidence import score_confidence
16
  from backend.graph.nodes.write_brain import write_brain
17
 
18
 
19
+ def route_to_ingestion(state: BrainState) -> list[Send]:
20
+ """Fan-out: dispatch source files to type-specific ingestion nodes."""
21
+ sends = []
22
+ for f in state.get("source_files", []):
23
+ dt = f.get("doc_type", "unknown")
24
+ payload = {
25
+ "company_id": state["company_id"],
26
+ "job_id": state["job_id"],
27
+ "source_files": [f],
28
+ }
29
+ if dt == "notion_md":
30
+ sends.append(Send("ingest_notion", payload))
31
+ elif dt == "slack_json":
32
+ sends.append(Send("ingest_slack", payload))
33
+ elif dt == "tickets_json":
34
+ sends.append(Send("ingest_tickets", payload))
35
+ return sends
36
+
37
+
38
+ def route_to_extraction(state: BrainState) -> list[Send]:
39
+ """Fan-out: dispatch all chunks to 4 parallel extraction agents."""
40
+ return [
41
+ Send("extract_decisions", dict(state)),
42
+ Send("extract_workflows", dict(state)),
43
+ Send("extract_exceptions", dict(state)),
44
+ Send("detect_contradictions", dict(state)),
45
+ ]
46
+
47
+
48
  def build_compilation_graph() -> StateGraph:
49
  """
50
+ Parallel multi-agent graph:
51
+
52
+ load_sources
53
+ → route_to_ingestion (Send fan-out)
54
+ → [ingest_notion, ingest_slack, ingest_tickets] (parallel)
55
+ → ingest_join (barrier)
56
+ → route_to_extraction (Send fan-out)
57
+ → [extract_decisions, extract_workflows, extract_exceptions, detect_contradictions] (parallel)
58
+ → synthesize_skills → link_evidence → score_confidence → write_brain
59
  """
60
  workflow = StateGraph(BrainState)
61
 
62
+ # --- Ingestion layer ---
63
+ workflow.add_node("load_sources", load_sources)
64
+ workflow.add_node("ingest_notion", ingest_notion)
65
+ workflow.add_node("ingest_slack", ingest_slack)
66
+ workflow.add_node("ingest_tickets", ingest_tickets)
67
+ workflow.add_node("ingest_join", ingest_join)
68
+
69
+ # --- Extraction layer ---
70
+ workflow.add_node("extract_decisions", extract_decisions)
71
+ workflow.add_node("extract_workflows", extract_workflows)
72
+ workflow.add_node("extract_exceptions", extract_exceptions)
73
+ workflow.add_node("detect_contradictions", detect_contradictions)
74
+
75
+ # --- Compilation layer ---
76
  workflow.add_node("synthesize_skills", synthesize_skills)
77
+ workflow.add_node("link_evidence", link_evidence)
78
+ workflow.add_node("score_confidence", score_confidence)
79
  workflow.add_node("write_brain", write_brain)
80
 
81
+ # --- Edges ---
82
+ workflow.set_entry_point("load_sources")
83
+
84
+ # load_sources fans out to 3 parallel ingest nodes
85
+ workflow.add_conditional_edges(
86
+ "load_sources",
87
+ route_to_ingestion,
88
+ [
89
+ "ingest_notion",
90
+ "ingest_slack",
91
+ "ingest_tickets",
92
+ ],
93
+ )
94
+
95
+ # All 3 ingest nodes converge at the barrier join
96
+ workflow.add_edge("ingest_notion", "ingest_join")
97
+ workflow.add_edge("ingest_slack", "ingest_join")
98
+ workflow.add_edge("ingest_tickets", "ingest_join")
99
+
100
+ # ingest_join fans out to 4 parallel extraction agents
101
+ workflow.add_conditional_edges(
102
+ "ingest_join",
103
+ route_to_extraction,
104
+ [
105
+ "extract_decisions",
106
+ "extract_workflows",
107
+ "extract_exceptions",
108
+ "detect_contradictions",
109
+ ],
110
+ )
111
+
112
+ # All 4 extraction agents converge at synthesize_skills
113
+ workflow.add_edge("extract_decisions", "synthesize_skills")
114
+ workflow.add_edge("extract_workflows", "synthesize_skills")
115
+ workflow.add_edge("extract_exceptions", "synthesize_skills")
116
+ workflow.add_edge("detect_contradictions", "synthesize_skills")
117
+
118
+ # Sequential compilation pipeline
119
+ workflow.add_edge("synthesize_skills", "link_evidence")
120
+ workflow.add_edge("link_evidence", "score_confidence")
121
+ workflow.add_edge("score_confidence", "write_brain")
122
  workflow.add_edge("write_brain", END)
123
 
124
  return workflow.compile()
backend/graph/nodes/cluster_evidence.py DELETED
@@ -1,64 +0,0 @@
1
- """
2
- Node 2: Embed all chunks and cluster them by domain using the LLM.
3
- Emits SSE stage: EMBEDDING
4
- """
5
- import json
6
- from backend.graph.state import BrainState
7
- from backend.llm import llm_call, get_embeddings
8
- from backend.sse import emit
9
-
10
-
11
- async def cluster_evidence(state: BrainState) -> dict:
12
- job_id = state["job_id"]
13
- chunks = state.get("chunks", [])
14
-
15
- print(f"[{job_id}] Node cluster_evidence started with {len(chunks)} chunks")
16
-
17
- if not chunks:
18
- await emit(job_id, "stage", {"name": "EMBEDDING", "detail": "No chunks to embed"})
19
- return {"clusters": {"domains": {}}}
20
-
21
- await emit(job_id, "stage", {"name": "EMBEDDING", "detail": f"Embedding {len(chunks)} chunks"})
22
-
23
- # Build a numbered summary of each chunk for the LLM
24
- summaries = []
25
- for i, c in enumerate(chunks):
26
- # Truncate long chunks for the categorization prompt
27
- preview = c["text"][:300].replace("\n", " ")
28
- summaries.append(f"[{i}] ({c['source_file']}) {preview}")
29
-
30
- chunk_list_text = "\n".join(summaries)
31
-
32
- prompt = """You are an operations analyst. Below is a numbered list of text chunks extracted from a company's internal documents (SOPs, Slack messages, support tickets).
33
-
34
- Categorize each chunk into an operational domain. Use clear domain names like:
35
- "Customer Support", "Engineering", "Sales", "Human Resources", "Finance", "Operations", etc.
36
-
37
- Return ONLY a valid JSON object mapping domain names to arrays of chunk indices.
38
- Example: {"Customer Support": [0, 3, 5], "Engineering": [1, 2], "Sales": [4]}
39
-
40
- Every chunk index must appear exactly once. Do not skip any."""
41
-
42
- response_str = await llm_call(prompt, chunk_list_text)
43
-
44
- try:
45
- clean = response_str.strip()
46
- if clean.startswith("```json"):
47
- clean = clean[7:]
48
- if clean.startswith("```"):
49
- clean = clean[3:]
50
- if clean.endswith("```"):
51
- clean = clean[:-3]
52
- domains = json.loads(clean.strip())
53
- except Exception as e:
54
- print(f"[cluster_evidence] Failed to parse LLM clustering: {e}")
55
- # Fallback: put all chunks in one cluster
56
- domains = {"General": list(range(len(chunks)))}
57
-
58
- await emit(job_id, "stage", {
59
- "name": "EMBEDDING_DONE",
60
- "detail": f"Clustered into {len(domains)} domains: {list(domains.keys())}",
61
- })
62
-
63
- print(f"[{job_id}] Node cluster_evidence finished with {len(domains)} domains")
64
- return {"clusters": {"domains": domains}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/graph/nodes/detect_contradictions.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.llm import safe_llm_json_call
3
+ from backend.sse import emit
4
+
5
+
6
+ SYSTEM = """You are a contradiction detection specialist. Your ONLY job is to find CONTRADICTIONS, CONFLICTS, and INCONSISTENCIES across company communications.
7
+
8
+ Output ONLY a JSON array. No preamble. No explanation. No markdown.
9
+ Each item must have exactly these fields:
10
+ - id: short snake_case identifier (e.g., "refund_window_conflict")
11
+ - domain: the operational domain this contradiction affects
12
+ - claim_a: what the first source says
13
+ - source_a: which source file claim_a comes from
14
+ - claim_b: what the second source says
15
+ - source_b: which source file claim_b comes from
16
+ - resolution: which claim takes precedence in practice (based on Slack/ticket behavior vs SOP policy)
17
+ - severity: "high", "medium", or "low"
18
+
19
+ If you find no contradictions, output: []
20
+ Example: [{"id": "refund_window_conflict", "domain": "Customer Support", "claim_a": "30-day refund window", "source_a": "notion_refund_sop.md", "claim_b": "45-day refund approved for loyal customer", "source_b": "slack_export_support.json", "resolution": "Observed behavior (Slack) shows exceptions beyond SOP — default to SOP, escalate exceptions", "severity": "medium"}]"""
21
+
22
+
23
+ async def detect_contradictions(state: BrainState) -> dict:
24
+ job_id = state["job_id"]
25
+ chunks = state.get("all_chunks", [])
26
+
27
+ print(f"[{job_id}] Node detect_contradictions: processing {len(chunks)} chunks")
28
+ await emit(
29
+ job_id,
30
+ "stage",
31
+ {
32
+ "name": "DETECT_CONTRADICTIONS",
33
+ "detail": "Detecting cross-source contradictions...",
34
+ },
35
+ )
36
+
37
+ chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
38
+ user = f"Detect contradictions and conflicting instructions across this company data:\n\n{chunk_text}"
39
+
40
+ results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
41
+
42
+ print(f"[{job_id}] detect_contradictions: found {len(results)} contradictions")
43
+ await emit(
44
+ job_id,
45
+ "stage",
46
+ {
47
+ "name": "DETECT_CONTRADICTIONS_DONE",
48
+ "detail": f"Found {len(results)} contradictions",
49
+ },
50
+ )
51
+ return {"contradictions": results}
backend/graph/nodes/extract_decisions.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.llm import safe_llm_json_call
3
+ from backend.sse import emit
4
+
5
+
6
+ SYSTEM = """You are a policy extraction specialist. Your ONLY job is to extract DECISIONS, RULES, and POLICIES from company communications.
7
+
8
+ Output ONLY a JSON array. No preamble. No explanation. No markdown.
9
+ Each item must have exactly these fields:
10
+ - id: short snake_case identifier (e.g., "refund_annual_14day")
11
+ - category: operational domain (e.g., "Customer Support", "Engineering", "Finance")
12
+ - rule: the precise, actionable rule text including thresholds, timeframes, approvals
13
+ - rationale: why this rule exists, based on the evidence
14
+ - evidence: array of specific quotes or references from the source text that support this rule
15
+ - source_files: array of filenames this rule came from
16
+
17
+ If you find no decisions or rules, output: []
18
+ Example: [{"id": "refund_annual_14day", "category": "Customer Support", "rule": "Annual plan customers within 14 days of purchase are eligible for full refund", "rationale": "No-questions policy for annual plans within 14 days", "evidence": ["notion_refund_sop.md: Annual plan customers within 14 days..."], "source_files": ["notion_refund_sop.md"]}]"""
19
+
20
+
21
+ async def extract_decisions(state: BrainState) -> dict:
22
+ job_id = state["job_id"]
23
+ chunks = state.get("all_chunks", [])
24
+
25
+ print(f"[{job_id}] Node extract_decisions: processing {len(chunks)} chunks")
26
+ await emit(
27
+ job_id,
28
+ "stage",
29
+ {"name": "EXTRACT_DECISIONS", "detail": "Extracting rules and policies..."},
30
+ )
31
+
32
+ chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
33
+ user = f"Extract all decisions, rules, and policies from this company data:\n\n{chunk_text}"
34
+
35
+ results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
36
+
37
+ print(f"[{job_id}] extract_decisions: extracted {len(results)} rules")
38
+ await emit(
39
+ job_id,
40
+ "stage",
41
+ {"name": "EXTRACT_DECISIONS_DONE", "detail": f"Found {len(results)} rules"},
42
+ )
43
+ return {"raw_decisions": results}
backend/graph/nodes/extract_exceptions.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.llm import safe_llm_json_call
3
+ from backend.sse import emit
4
+
5
+
6
+ SYSTEM = """You are an exception extraction specialist. Your ONLY job is to extract EXCEPTIONS, EDGE CASES, CONSTRAINTS, CONDITIONAL RULES, and FORBIDDEN ACTIONS from company communications.
7
+
8
+ Output ONLY a JSON array. No preamble. No explanation. No markdown.
9
+ Each item must have exactly these fields:
10
+ - id: short snake_case identifier (e.g., "no_ltd_refunds")
11
+ - category: operational domain
12
+ - condition: the specific condition that triggers this exception
13
+ - action: what happens when this exception applies
14
+ - rationale: why this exception exists
15
+ - source_files: array of filenames this came from
16
+
17
+ If you find no exceptions, output: []
18
+ Example: [{"id": "no_ltd_refunds", "category": "Customer Support", "condition": "Customer has a lifetime deal account", "action": "Never process refunds for lifetime deal accounts", "rationale": "Explicitly stated in refund SOP as forbidden action", "source_files": ["notion_refund_sop.md"]}]"""
19
+
20
+
21
+ async def extract_exceptions(state: BrainState) -> dict:
22
+ job_id = state["job_id"]
23
+ chunks = state.get("all_chunks", [])
24
+
25
+ print(f"[{job_id}] Node extract_exceptions: processing {len(chunks)} chunks")
26
+ await emit(
27
+ job_id,
28
+ "stage",
29
+ {
30
+ "name": "EXTRACT_EXCEPTIONS",
31
+ "detail": "Extracting exceptions and edge cases...",
32
+ },
33
+ )
34
+
35
+ chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
36
+ user = f"Extract all exceptions, edge cases, constraints, and forbidden actions from this company data:\n\n{chunk_text}"
37
+
38
+ results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
39
+
40
+ print(f"[{job_id}] extract_exceptions: extracted {len(results)} exceptions")
41
+ await emit(
42
+ job_id,
43
+ "stage",
44
+ {
45
+ "name": "EXTRACT_EXCEPTIONS_DONE",
46
+ "detail": f"Found {len(results)} exceptions",
47
+ },
48
+ )
49
+ return {"exception_rules": results}
backend/graph/nodes/extract_workflows.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.llm import safe_llm_json_call
3
+ from backend.sse import emit
4
+
5
+
6
+ SYSTEM = """You are a workflow extraction specialist. Your ONLY job is to extract WORKFLOWS, PROCESSES, and SEQUENTIAL STEPS from company communications.
7
+
8
+ Output ONLY a JSON array. No preamble. No explanation. No markdown.
9
+ Each item must have exactly these fields:
10
+ - id: short snake_case identifier (e.g., "bug_triage_workflow")
11
+ - category: operational domain (e.g., "Engineering", "Customer Support")
12
+ - workflow_name: human-readable name for this workflow
13
+ - steps: array of step descriptions in order
14
+ - triggers: what initiates this workflow
15
+ - source_files: array of filenames this came from
16
+
17
+ If you find no workflows, output: []
18
+ Example: [{"id": "bug_triage_workflow", "category": "Engineering", "workflow_name": "Bug Triage", "steps": ["1. Identify severity (P0/P1/P2)", "2. Page on-call for P0", "3. 4hr SLA for P1"], "triggers": ["Bug report filed with severity label"], "source_files": ["notion_eng_runbook.md"]}]"""
19
+
20
+
21
+ async def extract_workflows(state: BrainState) -> dict:
22
+ job_id = state["job_id"]
23
+ chunks = state.get("all_chunks", [])
24
+
25
+ print(f"[{job_id}] Node extract_workflows: processing {len(chunks)} chunks")
26
+ await emit(
27
+ job_id,
28
+ "stage",
29
+ {
30
+ "name": "EXTRACT_WORKFLOWS",
31
+ "detail": "Extracting workflows and processes...",
32
+ },
33
+ )
34
+
35
+ chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
36
+ user = f"Extract all workflows, processes, and step-by-step procedures from this company data:\n\n{chunk_text}"
37
+
38
+ results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
39
+
40
+ print(f"[{job_id}] extract_workflows: extracted {len(results)} workflows")
41
+ await emit(
42
+ job_id,
43
+ "stage",
44
+ {"name": "EXTRACT_WORKFLOWS_DONE", "detail": f"Found {len(results)} workflows"},
45
+ )
46
+ return {"workflow_steps": results}
backend/graph/nodes/ingest_join.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.sse import emit
3
+
4
+
5
+ async def ingest_join(state: BrainState) -> dict:
6
+ job_id = state["job_id"]
7
+
8
+ structured_sops = state.get("structured_sops", [])
9
+ normalized_events = state.get("normalized_events", [])
10
+ resolved_cases = state.get("resolved_cases", [])
11
+
12
+ all_chunks = []
13
+ all_chunks.extend(structured_sops)
14
+ all_chunks.extend(normalized_events)
15
+ all_chunks.extend(resolved_cases)
16
+
17
+ print(
18
+ f"[{job_id}] Node ingest_join: merged {len(structured_sops)} SOPs + {len(normalized_events)} events + {len(resolved_cases)} tickets = {len(all_chunks)} chunks"
19
+ )
20
+
21
+ await emit(
22
+ job_id,
23
+ "stage",
24
+ {
25
+ "name": "INGEST_JOIN",
26
+ "detail": f"Merged {len(all_chunks)} total chunks from all sources",
27
+ },
28
+ )
29
+ return {"all_chunks": all_chunks}
backend/graph/nodes/ingest_notion.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.sse import emit
3
+
4
+
5
+ async def ingest_notion(state: BrainState) -> dict:
6
+ job_id = state["job_id"]
7
+ source_files = state.get("source_files", [])
8
+
9
+ notion_files = [f for f in source_files if f.get("doc_type") == "notion_md"]
10
+ print(f"[{job_id}] Node ingest_notion: {len(notion_files)} notion files")
11
+
12
+ structured_sops = []
13
+ for sf in notion_files:
14
+ chunks = _chunk_markdown(sf)
15
+ structured_sops.extend(chunks)
16
+
17
+ await emit(
18
+ job_id,
19
+ "stage",
20
+ {
21
+ "name": "INGEST_NOTION",
22
+ "detail": f"Processed {len(notion_files)} SOP files into {len(structured_sops)} chunks",
23
+ },
24
+ )
25
+ print(f"[{job_id}] ingest_notion finished: {len(structured_sops)} chunks")
26
+ return {"structured_sops": structured_sops}
27
+
28
+
29
+ def _chunk_markdown(sf: dict) -> list:
30
+ content = sf["content"]
31
+ sections = []
32
+ current_header = "Introduction"
33
+ current_body = []
34
+
35
+ for line in content.split("\n"):
36
+ if line.startswith("## "):
37
+ if current_body:
38
+ sections.append((current_header, "\n".join(current_body).strip()))
39
+ current_header = line.lstrip("# ").strip()
40
+ current_body = []
41
+ else:
42
+ current_body.append(line)
43
+
44
+ if current_body:
45
+ sections.append((current_header, "\n".join(current_body).strip()))
46
+
47
+ chunks = []
48
+ for i, (header, body) in enumerate(sections):
49
+ if not body:
50
+ continue
51
+ chunks.append(
52
+ {
53
+ "text": f"[{header}] {body}",
54
+ "source_file": sf["filename"],
55
+ "chunk_index": i,
56
+ "doc_type": "notion_md",
57
+ "section_header": header,
58
+ }
59
+ )
60
+ return chunks
backend/graph/nodes/ingest_slack.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from backend.graph.state import BrainState
3
+ from backend.sse import emit
4
+
5
+
6
+ async def ingest_slack(state: BrainState) -> dict:
7
+ job_id = state["job_id"]
8
+ source_files = state.get("source_files", [])
9
+
10
+ slack_files = [f for f in source_files if f.get("doc_type") == "slack_json"]
11
+ print(f"[{job_id}] Node ingest_slack: {len(slack_files)} slack files")
12
+
13
+ normalized_events = []
14
+ for sf in slack_files:
15
+ chunks = _chunk_slack(sf)
16
+ normalized_events.extend(chunks)
17
+
18
+ await emit(
19
+ job_id,
20
+ "stage",
21
+ {
22
+ "name": "INGEST_SLACK",
23
+ "detail": f"Processed {len(slack_files)} Slack exports into {len(normalized_events)} messages",
24
+ },
25
+ )
26
+ print(f"[{job_id}] ingest_slack finished: {len(normalized_events)} messages")
27
+ return {"normalized_events": normalized_events}
28
+
29
+
30
+ def _chunk_slack(sf: dict) -> list:
31
+ try:
32
+ messages = json.loads(sf["content"])
33
+ except json.JSONDecodeError:
34
+ return []
35
+ chunks = []
36
+ for i, msg in enumerate(messages):
37
+ text = msg.get("text", "")
38
+ if not text:
39
+ continue
40
+ user = msg.get("user", "unknown")
41
+ channel = msg.get("channel", "unknown")
42
+ chunks.append(
43
+ {
44
+ "text": f"[Slack #{channel} @{user}] {text}",
45
+ "source_file": sf["filename"],
46
+ "chunk_index": i,
47
+ "doc_type": "slack_json",
48
+ }
49
+ )
50
+ return chunks
backend/graph/nodes/ingest_tickets.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from backend.graph.state import BrainState
3
+ from backend.sse import emit
4
+
5
+
6
+ async def ingest_tickets(state: BrainState) -> dict:
7
+ job_id = state["job_id"]
8
+ source_files = state.get("source_files", [])
9
+
10
+ ticket_files = [f for f in source_files if f.get("doc_type") == "tickets_json"]
11
+ print(f"[{job_id}] Node ingest_tickets: {len(ticket_files)} ticket files")
12
+
13
+ resolved_cases = []
14
+ for sf in ticket_files:
15
+ chunks = _chunk_tickets(sf)
16
+ resolved_cases.extend(chunks)
17
+
18
+ await emit(
19
+ job_id,
20
+ "stage",
21
+ {
22
+ "name": "INGEST_TICKETS",
23
+ "detail": f"Processed {len(ticket_files)} ticket files into {len(resolved_cases)} cases",
24
+ },
25
+ )
26
+ print(f"[{job_id}] ingest_tickets finished: {len(resolved_cases)} tickets")
27
+ return {"resolved_cases": resolved_cases}
28
+
29
+
30
+ def _chunk_tickets(sf: dict) -> list:
31
+ try:
32
+ tickets = json.loads(sf["content"])
33
+ except json.JSONDecodeError:
34
+ return []
35
+ chunks = []
36
+ for i, tkt in enumerate(tickets):
37
+ parts = []
38
+ if tkt.get("subject"):
39
+ parts.append(f"Subject: {tkt['subject']}")
40
+ if tkt.get("description"):
41
+ parts.append(f"Description: {tkt['description']}")
42
+ if tkt.get("resolution"):
43
+ parts.append(f"Resolution: {tkt['resolution']}")
44
+ if tkt.get("priority"):
45
+ parts.append(f"Priority: {tkt['priority']}")
46
+ if tkt.get("customer_plan"):
47
+ parts.append(f"Plan: {tkt['customer_plan']}")
48
+ text = " | ".join(parts)
49
+ if not text:
50
+ continue
51
+ chunks.append(
52
+ {
53
+ "text": f"[Zendesk Ticket] {text}",
54
+ "source_file": sf["filename"],
55
+ "chunk_index": i,
56
+ "doc_type": "tickets_json",
57
+ }
58
+ )
59
+ return chunks
backend/graph/nodes/link_evidence.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from backend.graph.state import BrainState
3
+ from backend.llm import llm_call
4
+ from backend.sse import emit
5
+
6
+
7
+ async def link_evidence(state: BrainState) -> dict:
8
+ job_id = state["job_id"]
9
+ draft_skills = state.get("draft_skills", [])
10
+ chunks = state.get("all_chunks", [])
11
+
12
+ print(
13
+ f"[{job_id}] Node link_evidence: enriching {len(draft_skills)} skills with evidence"
14
+ )
15
+ await emit(
16
+ job_id,
17
+ "stage",
18
+ {
19
+ "name": "LINKING_EVIDENCE",
20
+ "detail": f"Linking evidence for {len(draft_skills)} skills",
21
+ },
22
+ )
23
+
24
+ if not draft_skills:
25
+ return {"skills_with_evidence": []}
26
+
27
+ prompt = """You are an evidence linking specialist. Below are draft operational skills and the original source chunks they were extracted from.
28
+
29
+ For each skill, find the most specific evidence excerpts from the source chunks that support it. Enrich each skill's evidence array with concrete quotes.
30
+
31
+ Return ONLY a JSON object:
32
+ {
33
+ "skills": [
34
+ {
35
+ "id": "skill_id",
36
+ "category": "...",
37
+ "rule": "...",
38
+ "rationale": "...",
39
+ "evidence": ["Exact quote from source that supports this rule"],
40
+ "source_files": ["filename.ext"]
41
+ }
42
+ ]
43
+ }
44
+
45
+ Keep all existing fields intact. Only add or improve the evidence array."""
46
+
47
+ skills_text = json.dumps({"skills": draft_skills}, indent=2)
48
+ chunks_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks[:25]])
49
+ user_content = (
50
+ f"--- Skills ---\n{skills_text}\n\n--- Source Chunks ---\n{chunks_text}"
51
+ )
52
+
53
+ response_str = await llm_call(prompt, user_content, max_tokens=4096)
54
+
55
+ try:
56
+ clean = response_str.strip()
57
+ if clean.startswith("```json"):
58
+ clean = clean[7:]
59
+ elif clean.startswith("```"):
60
+ clean = clean[3:]
61
+ if clean.endswith("```"):
62
+ clean = clean[:-3]
63
+ data = json.loads(clean.strip())
64
+ enriched = data.get("skills", draft_skills)
65
+ except Exception as e:
66
+ print(f"[{job_id}] [link_evidence] Parse error: {e}")
67
+ enriched = draft_skills
68
+
69
+ await emit(
70
+ job_id,
71
+ "stage",
72
+ {
73
+ "name": "LINKING_DONE",
74
+ "detail": f"Evidence linked for {len(enriched)} skills",
75
+ },
76
+ )
77
+ print(f"[{job_id}] link_evidence: done")
78
+ return {"skills_with_evidence": enriched}
backend/graph/nodes/load_and_chunk.py DELETED
@@ -1,174 +0,0 @@
1
- """
2
- Node 1: Load source files from disk and chunk them.
3
- Emits SSE stages: LOADING_DOCS, CHUNKING
4
- """
5
- import os
6
- import json
7
- import hashlib
8
- import time
9
- from backend.graph.state import BrainState
10
- from backend.sse import emit
11
-
12
-
13
- async def load_and_chunk(state: BrainState) -> dict:
14
- company_id = state["company_id"]
15
- job_id = state["job_id"]
16
-
17
- print(f"[{job_id}] Node load_and_chunk started")
18
- await emit(job_id, "stage", {"name": "LOADING_DOCS", "detail": f"Reading sources for {company_id}"})
19
-
20
- # Read files from the company-specific directory
21
- # __file__ is backend/graph/nodes/load_and_chunk.py
22
- base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
23
- sources_dir = os.path.join(base, "data", "sources", company_id)
24
-
25
- if not os.path.isdir(sources_dir):
26
- await emit(job_id, "pipeline_error", {"error": f"No source directory found: data/sources/{company_id}/"})
27
- print(f"[{job_id}] Node load_and_chunk failed (Missing dir: {sources_dir})")
28
- return {"errors": [f"Missing directory: {sources_dir}"], "source_files": [], "chunks": []}
29
-
30
- source_files = []
31
- for filename in sorted(os.listdir(sources_dir)):
32
- filepath = os.path.join(sources_dir, filename)
33
- if not os.path.isfile(filepath):
34
- continue
35
- with open(filepath, "r", encoding="utf-8") as f:
36
- content = f.read()
37
- doc_type = _detect_type(filename)
38
- source_files.append({
39
- "filename": filename,
40
- "content": content,
41
- "sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
42
- "doc_type": doc_type,
43
- })
44
-
45
- await emit(job_id, "stage", {
46
- "name": "CHUNKING",
47
- "detail": f"Splitting {len(source_files)} files into chunks",
48
- })
49
-
50
- chunks = []
51
- for sf in source_files:
52
- if sf["doc_type"] == "notion_md":
53
- chunks.extend(_chunk_markdown(sf))
54
- elif sf["doc_type"] == "slack_json":
55
- chunks.extend(_chunk_slack(sf))
56
- elif sf["doc_type"] == "tickets_json":
57
- chunks.extend(_chunk_tickets(sf))
58
- else:
59
- # Treat unknown as plain text
60
- chunks.append({
61
- "text": sf["content"],
62
- "source_file": sf["filename"],
63
- "chunk_index": 0,
64
- "doc_type": sf["doc_type"],
65
- })
66
-
67
- await emit(job_id, "stage", {
68
- "name": "CHUNKING_DONE",
69
- "detail": f"Produced {len(chunks)} chunks from {len(source_files)} files",
70
- })
71
-
72
- print(f"[{job_id}] Node load_and_chunk finished (chunks: {len(chunks)})")
73
- return {"source_files": source_files, "chunks": chunks}
74
-
75
-
76
- # --- Helpers ---
77
-
78
- def _detect_type(filename: str) -> str:
79
- fn = filename.lower()
80
- if fn.endswith(".json"):
81
- if "slack" in fn:
82
- return "slack_json"
83
- if "ticket" in fn or "zendesk" in fn:
84
- return "tickets_json"
85
- return "json"
86
- if fn.endswith(".md"):
87
- return "notion_md"
88
- return "unknown"
89
-
90
-
91
- def _chunk_markdown(sf: dict) -> list:
92
- """Split a markdown file by ## headers. Each section is a chunk."""
93
- content = sf["content"]
94
- sections = []
95
- current_header = "Introduction"
96
- current_body = []
97
-
98
- for line in content.split("\n"):
99
- if line.startswith("## "):
100
- if current_body:
101
- sections.append((current_header, "\n".join(current_body).strip()))
102
- current_header = line.lstrip("# ").strip()
103
- current_body = []
104
- else:
105
- current_body.append(line)
106
-
107
- if current_body:
108
- sections.append((current_header, "\n".join(current_body).strip()))
109
-
110
- chunks = []
111
- for i, (header, body) in enumerate(sections):
112
- if not body:
113
- continue
114
- chunks.append({
115
- "text": f"[{header}] {body}",
116
- "source_file": sf["filename"],
117
- "chunk_index": i,
118
- "doc_type": "notion_md",
119
- "section_header": header,
120
- })
121
- return chunks
122
-
123
-
124
- def _chunk_slack(sf: dict) -> list:
125
- """Each Slack message is one chunk."""
126
- try:
127
- messages = json.loads(sf["content"])
128
- except json.JSONDecodeError:
129
- return []
130
- chunks = []
131
- for i, msg in enumerate(messages):
132
- text = msg.get("text", "")
133
- if not text:
134
- continue
135
- user = msg.get("user", "unknown")
136
- channel = msg.get("channel", "unknown")
137
- chunks.append({
138
- "text": f"[Slack #{channel} @{user}] {text}",
139
- "source_file": sf["filename"],
140
- "chunk_index": i,
141
- "doc_type": "slack_json",
142
- })
143
- return chunks
144
-
145
-
146
- def _chunk_tickets(sf: dict) -> list:
147
- """Each ticket is one chunk."""
148
- try:
149
- tickets = json.loads(sf["content"])
150
- except json.JSONDecodeError:
151
- return []
152
- chunks = []
153
- for i, tkt in enumerate(tickets):
154
- parts = []
155
- if tkt.get("subject"):
156
- parts.append(f"Subject: {tkt['subject']}")
157
- if tkt.get("description"):
158
- parts.append(f"Description: {tkt['description']}")
159
- if tkt.get("resolution"):
160
- parts.append(f"Resolution: {tkt['resolution']}")
161
- if tkt.get("priority"):
162
- parts.append(f"Priority: {tkt['priority']}")
163
- if tkt.get("customer_plan"):
164
- parts.append(f"Plan: {tkt['customer_plan']}")
165
- text = " | ".join(parts)
166
- if not text:
167
- continue
168
- chunks.append({
169
- "text": f"[Zendesk Ticket] {text}",
170
- "source_file": sf["filename"],
171
- "chunk_index": i,
172
- "doc_type": "tickets_json",
173
- })
174
- return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/graph/nodes/load_sources.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+ from backend.graph.state import BrainState
4
+ from backend.sse import emit
5
+
6
+
7
+ def _detect_type(filename: str) -> str:
8
+ fn = filename.lower()
9
+ if fn.endswith(".json"):
10
+ if "slack" in fn:
11
+ return "slack_json"
12
+ if "ticket" in fn or "zendesk" in fn:
13
+ return "tickets_json"
14
+ return "json"
15
+ if fn.endswith(".md"):
16
+ return "notion_md"
17
+ return "unknown"
18
+
19
+
20
+ async def load_sources(state: BrainState) -> dict:
21
+ company_id = state["company_id"]
22
+ job_id = state["job_id"]
23
+
24
+ print(f"[{job_id}] Node load_sources started")
25
+ await emit(
26
+ job_id,
27
+ "stage",
28
+ {"name": "LOADING_DOCS", "detail": f"Reading sources for {company_id}"},
29
+ )
30
+
31
+ base = os.path.dirname(
32
+ os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
33
+ )
34
+ sources_dir = os.path.join(base, "data", "sources", company_id)
35
+
36
+ if not os.path.isdir(sources_dir):
37
+ await emit(
38
+ job_id,
39
+ "pipeline_error",
40
+ {"error": f"No source directory: data/sources/{company_id}/"},
41
+ )
42
+ print(f"[{job_id}] load_sources failed — missing dir: {sources_dir}")
43
+ return {"errors": [f"Missing directory: {sources_dir}"], "source_files": []}
44
+
45
+ source_files = []
46
+ for filename in sorted(os.listdir(sources_dir)):
47
+ filepath = os.path.join(sources_dir, filename)
48
+ if not os.path.isfile(filepath):
49
+ continue
50
+ with open(filepath, "r", encoding="utf-8") as f:
51
+ content = f.read()
52
+ source_files.append(
53
+ {
54
+ "filename": filename,
55
+ "content": content,
56
+ "sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
57
+ "doc_type": _detect_type(filename),
58
+ }
59
+ )
60
+
61
+ print(f"[{job_id}] load_sources finished: {len(source_files)} files")
62
+ await emit(
63
+ job_id,
64
+ "stage",
65
+ {
66
+ "name": "LOADING_DOCS_DONE",
67
+ "detail": f"Loaded {len(source_files)} source files",
68
+ },
69
+ )
70
+ return {"source_files": source_files}
backend/graph/nodes/quality_normalize.py DELETED
@@ -1,83 +0,0 @@
1
- """
2
- Node 4: De-duplicate skills, resolve conflicts, score confidence, enforce schema.
3
- Emits SSE stage: QUALITY_CHECK
4
- """
5
- import json
6
- from backend.graph.state import BrainState
7
- from backend.llm import llm_call
8
- from backend.sse import emit
9
-
10
-
11
- async def quality_normalize(state: BrainState) -> dict:
12
- job_id = state["job_id"]
13
- raw_skills = state.get("raw_skills", [])
14
-
15
- print(f"[{job_id}] Node quality_normalize started with {len(raw_skills)} raw skills")
16
-
17
- if not raw_skills:
18
- await emit(job_id, "stage", {"name": "QUALITY_CHECK", "detail": "No skills to normalize"})
19
- print(f"[{job_id}] Node quality_normalize finished (0 skills)")
20
- return {"skills_file": {"skills": []}}
21
-
22
- await emit(job_id, "stage", {
23
- "name": "QUALITY_CHECK",
24
- "detail": f"Normalizing {len(raw_skills)} raw skills",
25
- })
26
-
27
- prompt = """You are a quality assurance agent for an operational skills file.
28
-
29
- Below is a raw list of skills extracted from company documents. Your job:
30
-
31
- 1. DEDUPLICATE: merge skills that describe the same rule (keep the most complete version).
32
- 2. RESOLVE CONFLICTS: if two skills contradict, keep both but note the conflict in the rationale. Prefer observed behavior (from Slack/tickets) over stated policy (from SOPs) when they conflict.
33
- 3. SCORE CONFIDENCE (0.0 to 1.0) for each skill based on:
34
- - 0.9–1.0: multiple confirming sources, clear unambiguous rule
35
- - 0.7–0.89: single strong source or multiple weak sources
36
- - 0.5–0.69: only one source, or some ambiguity
37
- - 0.3–0.49: weak evidence or significant ambiguity
38
- - < 0.3: speculative or poorly supported
39
- 4. ENFORCE SCHEMA: every skill must have: id, category, rule, rationale, evidence (array), confidence (float).
40
-
41
- Return ONLY a JSON object:
42
- {
43
- "skills": [
44
- {
45
- "id": "skill_slug",
46
- "category": "Domain Name",
47
- "rule": "The specific rule text",
48
- "rationale": "Why this rule exists",
49
- "evidence": ["source reference 1", "source reference 2"],
50
- "confidence": 0.85
51
- }
52
- ]
53
- }"""
54
-
55
- skills_text = json.dumps(raw_skills, indent=2)
56
- print(f"[{job_id}] Requesting quality normalization...")
57
- response_str = await llm_call(prompt, skills_text, max_tokens=8192)
58
- print(f"[{job_id}] Received quality normalization response")
59
-
60
- try:
61
- clean = response_str.strip()
62
- if clean.startswith("```json"):
63
- clean = clean[7:]
64
- if clean.startswith("```"):
65
- clean = clean[3:]
66
- if clean.endswith("```"):
67
- clean = clean[:-3]
68
- data = json.loads(clean.strip())
69
- final_skills = data.get("skills", raw_skills)
70
- except Exception as e:
71
- print(f"[{job_id}] [quality_normalize] Parse error: {e}")
72
- # Fallback: use raw skills with default confidence
73
- final_skills = raw_skills
74
- for sk in final_skills:
75
- sk.setdefault("confidence", 0.5)
76
-
77
- await emit(job_id, "stage", {
78
- "name": "QUALITY_CHECK_DONE",
79
- "detail": f"Final skills count: {len(final_skills)} (from {len(raw_skills)} raw)",
80
- })
81
-
82
- print(f"[{job_id}] Node quality_normalize finished (final skills: {len(final_skills)})")
83
- return {"skills_file": {"skills": final_skills}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/graph/nodes/score_confidence.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.graph.state import BrainState
2
+ from backend.sse import emit
3
+
4
+
5
+ def _score_confidence(skill: dict, contradictions: list) -> float:
6
+ """Math-based confidence scoring per the CLAUDE.md formula."""
7
+ base = 0.5
8
+
9
+ source_count = len(skill.get("evidence", []))
10
+ if source_count >= 3:
11
+ base += 0.25
12
+ elif source_count == 2:
13
+ base += 0.15
14
+ elif source_count == 1:
15
+ base += 0.05
16
+
17
+ base += 0.15
18
+
19
+ skill_id = skill.get("id", "")
20
+ has_contradiction = any(
21
+ c.get("id", "").startswith(skill_id.split("_")[0])
22
+ or skill_id in str(c.get("domain", ""))
23
+ for c in contradictions
24
+ )
25
+ if not has_contradiction:
26
+ base += 0.10
27
+
28
+ return round(min(base, 1.0), 2)
29
+
30
+
31
+ async def score_confidence(state: BrainState) -> dict:
32
+ job_id = state["job_id"]
33
+ skills = state.get("skills_with_evidence", [])
34
+ contradictions = state.get("contradictions", [])
35
+
36
+ print(f"[{job_id}] Node score_confidence: scoring {len(skills)} skills")
37
+ await emit(
38
+ job_id,
39
+ "stage",
40
+ {"name": "SCORING_CONFIDENCE", "detail": f"Scoring {len(skills)} skills"},
41
+ )
42
+
43
+ final_skills = []
44
+ for skill in skills:
45
+ skill["confidence"] = _score_confidence(skill, contradictions)
46
+ final_skills.append(skill)
47
+
48
+ avg_conf = round(
49
+ sum(s.get("confidence", 0) for s in final_skills) / max(len(final_skills), 1), 2
50
+ )
51
+
52
+ await emit(
53
+ job_id,
54
+ "stage",
55
+ {
56
+ "name": "SCORING_DONE",
57
+ "detail": f"Average confidence: {avg_conf} across {len(final_skills)} skills",
58
+ },
59
+ )
60
+ print(f"[{job_id}] score_confidence: avg confidence {avg_conf}")
61
+ return {"final_skills": final_skills}
backend/graph/nodes/synthesize_skills.py CHANGED
@@ -1,9 +1,4 @@
1
- """
2
- Node 3: For each domain cluster, call vLLM to synthesize structured skills.
3
- Emits SSE stage: SYNTHESIZING_SKILLS
4
- """
5
  import json
6
- import uuid
7
  from backend.graph.state import BrainState
8
  from backend.llm import llm_call
9
  from backend.sse import emit
@@ -11,101 +6,102 @@ from backend.sse import emit
11
 
12
  async def synthesize_skills(state: BrainState) -> dict:
13
  job_id = state["job_id"]
14
- chunks = state.get("chunks", [])
15
- clusters = state.get("clusters", {})
16
- domains = clusters.get("domains", {})
17
-
18
- print(f"[{job_id}] Node synthesize_skills started with {len(domains)} domains")
19
-
20
- if not domains:
21
- await emit(job_id, "stage", {"name": "SYNTHESIZING_SKILLS", "detail": "No clusters to synthesize"})
22
- print(f"[{job_id}] Node synthesize_skills finished (0 domains)")
23
- return {"raw_skills": []}
24
-
25
- await emit(job_id, "stage", {
26
- "name": "SYNTHESIZING_SKILLS",
27
- "detail": f"Synthesizing skills for {len(domains)} domains",
28
- })
29
-
30
- all_skills = []
31
-
32
- for domain_name, chunk_indices in domains.items():
33
- # Gather the actual chunk texts for this domain
34
- domain_chunks = []
35
- for idx in chunk_indices:
36
- if 0 <= idx < len(chunks):
37
- domain_chunks.append(chunks[idx])
38
-
39
- if not domain_chunks:
40
- continue
41
-
42
- chunk_text = "\n\n".join([c["text"] for c in domain_chunks])
43
- source_files = list(set(c["source_file"] for c in domain_chunks))
44
-
45
- prompt = f"""You are a Principal Operations Architect analyzing the "{domain_name}" domain.
46
-
47
- Below are real excerpts from a company's internal documents (SOPs, Slack messages, support tickets) related to {domain_name}.
48
-
49
- Your job: extract every distinct operational rule, policy, process, or decision pattern you can find.
50
-
51
- For EACH skill, provide:
52
- - id: a unique identifier (use a short slug like "refund_loyal_customer")
53
- - category: "{domain_name}"
54
- - rule: the specific, actionable rule or process (be precise — include thresholds, timeframes, approvals)
55
- - rationale: why this rule exists (based on the evidence)
56
- - evidence: array of specific quotes or references from the source chunks that support this rule
57
  - source_files: which files this came from
58
 
59
- Rules for quality:
60
- - Extract what the documents ACTUALLY say, not what you assume.
61
- - If there are contradictions (e.g., SOP says X but Slack shows Y), note BOTH and state which takes precedence in practice.
62
- - Do NOT invent rules that aren't supported by the text below.
63
- - Each rule should be specific enough that a human could follow it without additional context.
64
 
65
  Respond with ONLY a JSON object:
66
- {{
67
  "skills": [
68
- {{
69
- "id": "refund_loyal_customer",
70
- "category": "{domain_name}",
71
- "rule": "Approve refunds up to 45 days for customers with >2 years tenure",
72
- "rationale": "Exception applied over standard 30-day limit for loyal customers",
73
- "evidence": ["slack_export_support.json: Mike approved 45-day refund for Acme Corp"],
74
- "source_files": ["slack_export_support.json", "notion_refund_sop.md"]
75
- }}
76
  ]
77
- }}"""
78
-
79
- print(f"[{job_id}] Requesting skills for domain '{domain_name}'...")
80
- response_str = await llm_call(prompt, chunk_text)
81
- print(f"[{job_id}] Received skills response for domain '{domain_name}'")
82
-
83
- try:
84
- clean = response_str.strip()
85
- if clean.startswith("```json"):
86
- clean = clean[7:]
87
- if clean.startswith("```"):
88
- clean = clean[3:]
89
- if clean.endswith("```"):
90
- clean = clean[:-3]
91
- data = json.loads(clean.strip())
92
- domain_skills = data.get("skills", [])
93
- except Exception as e:
94
- print(f"[{job_id}] [synthesize_skills] Parse error for {domain_name}: {e}")
95
- domain_skills = []
96
-
97
- # Ensure every skill has an id
98
- for sk in domain_skills:
99
- if not sk.get("id"):
100
- sk["id"] = str(uuid.uuid4())[:8]
101
- sk["category"] = domain_name # ensure consistency
102
-
103
- all_skills.extend(domain_skills)
104
-
105
- await emit(job_id, "stage", {
106
- "name": "SYNTHESIZING_SKILLS",
107
- "detail": f"{domain_name}: extracted {len(domain_skills)} skills",
108
- })
109
-
110
- print(f"[{job_id}] Node synthesize_skills finished (extracted {len(all_skills)} skills overall)")
111
- return {"raw_skills": all_skills}
 
 
 
 
 
 
 
 
1
  import json
 
2
  from backend.graph.state import BrainState
3
  from backend.llm import llm_call
4
  from backend.sse import emit
 
6
 
7
  async def synthesize_skills(state: BrainState) -> dict:
8
  job_id = state["job_id"]
9
+ raw_decisions = state.get("raw_decisions", [])
10
+ workflow_steps = state.get("workflow_steps", [])
11
+ exception_rules = state.get("exception_rules", [])
12
+ contradictions = state.get("contradictions", [])
13
+
14
+ total_raw = (
15
+ len(raw_decisions)
16
+ + len(workflow_steps)
17
+ + len(exception_rules)
18
+ + len(contradictions)
19
+ )
20
+ print(
21
+ f"[{job_id}] Node synthesize_skills: merging {len(raw_decisions)} decisions + {len(workflow_steps)} workflows + {len(exception_rules)} exceptions + {len(contradictions)} contradictions"
22
+ )
23
+
24
+ await emit(
25
+ job_id,
26
+ "stage",
27
+ {
28
+ "name": "SYNTHESIZING_SKILLS",
29
+ "detail": f"Merging {total_raw} extracted items into cohesive skills",
30
+ },
31
+ )
32
+
33
+ if total_raw == 0:
34
+ print(f"[{job_id}] synthesize_skills: no extractions to merge")
35
+ return {"draft_skills": []}
36
+
37
+ prompt = """You are a Principal Operations Architect. Below are four sets of extractions from company data:
38
+
39
+ 1. DECISIONS & RULES: explicit policies and decision criteria
40
+ 2. WORKFLOWS: step-by-step processes and procedures
41
+ 3. EXCEPTIONS: edge cases, constraints, forbidden actions
42
+ 4. CONTRADICTIONS: conflicts between different sources
43
+
44
+ Merge these into unified operational skills. For each skill:
45
+ - id: short snake_case identifier
46
+ - category: operational domain name
47
+ - rule: the specific, actionable rule text (be precise include thresholds, timeframes, approvals)
48
+ - rationale: why this rule exists (based on evidence)
49
+ - evidence: array of specific quotes or references from source data
 
 
50
  - source_files: which files this came from
51
 
52
+ Quality rules:
53
+ - Deduplicate: merge skills that describe the same rule (keep the most complete version)
54
+ - Resolve conflicts: note contradictions in the rationale
55
+ - Do NOT invent rules that aren't supported by the extractions
56
+ - Each rule should be specific enough that a human could follow it
57
 
58
  Respond with ONLY a JSON object:
59
+ {
60
  "skills": [
61
+ {
62
+ "id": "handle_refund_request",
63
+ "category": "Customer Support",
64
+ "rule": "Approve full refund for annual plans within 14 days",
65
+ "rationale": "No-questions policy within 14 days for annual plans",
66
+ "evidence": ["notion_refund_sop.md: Annual plan customers within 14 days..."],
67
+ "source_files": ["notion_refund_sop.md"]
68
+ }
69
  ]
70
+ }"""
71
+
72
+ extractions_text = json.dumps(
73
+ {
74
+ "decisions_and_rules": raw_decisions,
75
+ "workflows_and_processes": workflow_steps,
76
+ "exceptions_and_edge_cases": exception_rules,
77
+ "contradictions": contradictions,
78
+ },
79
+ indent=2,
80
+ )
81
+
82
+ response_str = await llm_call(prompt, extractions_text, max_tokens=4096)
83
+
84
+ try:
85
+ clean = response_str.strip()
86
+ if clean.startswith("```json"):
87
+ clean = clean[7:]
88
+ elif clean.startswith("```"):
89
+ clean = clean[3:]
90
+ if clean.endswith("```"):
91
+ clean = clean[:-3]
92
+ data = json.loads(clean.strip())
93
+ draft = data.get("skills", [])
94
+ except Exception as e:
95
+ print(f"[{job_id}] [synthesize_skills] Parse error: {e}")
96
+ draft = []
97
+
98
+ await emit(
99
+ job_id,
100
+ "stage",
101
+ {
102
+ "name": "SYNTHESIZING_DONE",
103
+ "detail": f"Synthesized {len(draft)} skills from {total_raw} extractions",
104
+ },
105
+ )
106
+ print(f"[{job_id}] synthesize_skills: produced {len(draft)} skills")
107
+ return {"draft_skills": draft}
backend/graph/nodes/write_brain.py CHANGED
@@ -1,33 +1,57 @@
1
- """
2
- Node 5: Write the final skills file to the database.
3
- Emits SSE stage: WRITING_DB, then pipeline_complete.
4
- """
5
  import time
6
  import json
7
  import uuid
8
  import datetime
9
  from backend.graph.state import BrainState
10
  from backend.db.supabase import get_client
 
11
  from backend.sse import emit
12
 
13
 
14
  async def write_brain(state: BrainState) -> dict:
15
  job_id = state.get("job_id")
16
  company_id = state.get("company_id")
17
- skills_file = state.get("skills_file", {})
18
- skills = skills_file.get("skills", [])
19
  start_time = state.get("start_time", time.time())
20
  duration_ms = int((time.time() - start_time) * 1000)
21
 
22
- print(f"[{job_id}] Node write_brain started for {company_id}")
 
 
 
 
 
 
 
 
 
 
23
 
24
- await emit(job_id, "stage", {"name": "WRITING_DB", "detail": f"Persisting {len(skills)} skills"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  db = get_client()
27
  if not db:
28
  await emit(job_id, "pipeline_error", {"error": "Database connection failed"})
29
- print(f"[{job_id}] Node write_brain failed (no DB client)")
30
- return {"errors": ["DB connection failed in write_brain"]}
 
 
 
31
 
32
  try:
33
  now_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
@@ -38,59 +62,74 @@ async def write_brain(state: BrainState) -> dict:
38
  if "filename" in f and "sha256" in f:
39
  source_hashes[f["filename"]] = f["sha256"]
40
 
41
- # Mark previous brain as not current
42
- db.table("skills_files").update(
43
- {"is_current": False}
44
- ).eq("company_id", company_id).eq("is_current", True).execute()
45
 
46
- # Insert new brain
47
- sf_res = db.table("skills_files").insert({
48
- "company_id": company_id,
49
- "version": version_str,
50
- "brain_json": skills_file,
51
- "source_hashes": source_hashes,
52
- "is_current": True,
53
- }).execute()
 
 
 
 
 
54
 
55
  sf_id = sf_res.data[0]["id"]
56
 
57
- # Insert individual skills
58
- for skill in skills:
59
- db.table("skills").insert({
60
- "id": skill.get("id", str(uuid.uuid4())[:8]),
61
- "company_id": company_id,
62
- "skills_file_id": sf_id,
63
- "name": skill.get("rule", "Unknown")[:200],
64
- "domain": skill.get("category", "general"),
65
- "version": version_str,
66
- "confidence": float(skill.get("confidence", 0.5)),
67
- "skill_json": skill,
68
- }).execute()
69
-
70
- # Update compile run
71
- db.table("compile_runs").update({
72
- "status": "complete",
73
- "completed_at": now_iso,
74
- "duration_ms": duration_ms,
75
- "result_version": version_str,
76
- }).eq("id", job_id).execute()
 
 
 
77
 
78
  except Exception as e:
79
  print(f"[{job_id}] [write_brain] DB Error: {e}")
80
  await emit(job_id, "pipeline_error", {"error": str(e)})
81
- return {"errors": [f"write_brain DB error: {e}"]}
82
-
83
- await emit(job_id, "stage", {
84
- "name": "DONE",
85
- "detail": f"Brain {version_str} written: {len(skills)} skills, {len(source_hashes)} sources, {duration_ms}ms",
86
- })
87
- await emit(job_id, "pipeline_complete", {
88
- "status": "success",
89
- "version": version_str,
90
- "skills_count": len(skills),
91
- "source_count": len(source_hashes),
92
- "duration_ms": duration_ms,
93
- })
94
-
95
- print(f"[{job_id}] Node write_brain finished successfully (version: {version_str})")
96
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import time
2
  import json
3
  import uuid
4
  import datetime
5
  from backend.graph.state import BrainState
6
  from backend.db.supabase import get_client
7
+ from backend.llm import get_embedding
8
  from backend.sse import emit
9
 
10
 
11
  async def write_brain(state: BrainState) -> dict:
12
  job_id = state.get("job_id")
13
  company_id = state.get("company_id")
14
+ final_skills = state.get("final_skills", [])
 
15
  start_time = state.get("start_time", time.time())
16
  duration_ms = int((time.time() - start_time) * 1000)
17
 
18
+ print(
19
+ f"[{job_id}] Node write_brain: persisting {len(final_skills)} skills for {company_id}"
20
+ )
21
+ await emit(
22
+ job_id,
23
+ "stage",
24
+ {
25
+ "name": "WRITING_DB",
26
+ "detail": f"Pre-embedding and persisting {len(final_skills)} skills",
27
+ },
28
+ )
29
 
30
+ skills_with_embeddings = []
31
+ for skill in final_skills:
32
+ skill_text = f"{skill.get('category', '')} {skill.get('rule', '')} {skill.get('rationale', '')}"
33
+ emb = get_embedding(skill_text)
34
+ skill["embedding_vector"] = emb
35
+ skills_with_embeddings.append(skill)
36
+
37
+ skills_file = {
38
+ "skills": skills_with_embeddings,
39
+ "meta": {
40
+ "company_id": company_id,
41
+ "compiled_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
42
+ "total_skills": len(skills_with_embeddings),
43
+ "duration_ms": duration_ms,
44
+ },
45
+ }
46
 
47
  db = get_client()
48
  if not db:
49
  await emit(job_id, "pipeline_error", {"error": "Database connection failed"})
50
+ print(f"[{job_id}] write_brain: no DB client")
51
+ return {
52
+ "errors": ["DB connection failed in write_brain"],
53
+ "skills_file": skills_file,
54
+ }
55
 
56
  try:
57
  now_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
 
62
  if "filename" in f and "sha256" in f:
63
  source_hashes[f["filename"]] = f["sha256"]
64
 
65
+ db.table("skills_files").update({"is_current": False}).eq(
66
+ "company_id", company_id
67
+ ).eq("is_current", True).execute()
 
68
 
69
+ sf_res = (
70
+ db.table("skills_files")
71
+ .insert(
72
+ {
73
+ "company_id": company_id,
74
+ "version": version_str,
75
+ "brain_json": skills_file,
76
+ "source_hashes": source_hashes,
77
+ "is_current": True,
78
+ }
79
+ )
80
+ .execute()
81
+ )
82
 
83
  sf_id = sf_res.data[0]["id"]
84
 
85
+ for skill in skills_with_embeddings:
86
+ skill_copy = {k: v for k, v in skill.items() if k != "embedding_vector"}
87
+ db.table("skills").insert(
88
+ {
89
+ "id": skill.get("id", str(uuid.uuid4())[:8]),
90
+ "company_id": company_id,
91
+ "skills_file_id": sf_id,
92
+ "name": skill.get("rule", "Unknown")[:200],
93
+ "domain": skill.get("category", "general"),
94
+ "version": version_str,
95
+ "confidence": float(skill.get("confidence", 0.5)),
96
+ "skill_json": skill_copy,
97
+ }
98
+ ).execute()
99
+
100
+ db.table("compile_runs").update(
101
+ {
102
+ "status": "complete",
103
+ "completed_at": now_iso,
104
+ "duration_ms": duration_ms,
105
+ "result_version": version_str,
106
+ }
107
+ ).eq("id", job_id).execute()
108
 
109
  except Exception as e:
110
  print(f"[{job_id}] [write_brain] DB Error: {e}")
111
  await emit(job_id, "pipeline_error", {"error": str(e)})
112
+ return {"errors": [f"write_brain DB error: {e}"], "skills_file": skills_file}
113
+
114
+ await emit(
115
+ job_id,
116
+ "stage",
117
+ {
118
+ "name": "DONE",
119
+ "detail": f"Brain {version_str} written: {len(skills_with_embeddings)} skills, {len(source_hashes)} sources, {duration_ms}ms",
120
+ },
121
+ )
122
+ await emit(
123
+ job_id,
124
+ "pipeline_complete",
125
+ {
126
+ "status": "success",
127
+ "version": version_str,
128
+ "skills_count": len(skills_with_embeddings),
129
+ "source_count": len(source_hashes),
130
+ "duration_ms": duration_ms,
131
+ },
132
+ )
133
+
134
+ print(f"[{job_id}] write_brain: done (version: {version_str})")
135
+ return {"skills_file": skills_file, "brain_version": version_str}
backend/graph/state.py CHANGED
@@ -1,14 +1,28 @@
1
  from typing import TypedDict, Annotated, List, Dict, Any
2
  import operator
3
 
 
4
  class BrainState(TypedDict):
5
  company_id: str
6
  job_id: str
7
- source_files: List[Dict[str, Any]] # [{filename, content, sha256, doc_type}]
8
- chunks: List[Dict[str, Any]] # [{text, source_file, chunk_index, doc_type}]
9
- clusters: Dict[str, Any] # {domains: {domain_name: [chunk_indices]}}
10
- raw_skills: List[Dict[str, Any]] # skills before quality pass
11
- skills_file: Dict[str, Any] # final {skills: [...]}
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  brain_version: str
13
  start_time: float
14
  errors: Annotated[List[str], operator.add]
 
1
  from typing import TypedDict, Annotated, List, Dict, Any
2
  import operator
3
 
4
+
5
  class BrainState(TypedDict):
6
  company_id: str
7
  job_id: str
8
+ source_files: Annotated[List[Dict[str, Any]], operator.add]
9
+
10
+ structured_sops: Annotated[List[Dict[str, Any]], operator.add]
11
+ normalized_events: Annotated[List[Dict[str, Any]], operator.add]
12
+ resolved_cases: Annotated[List[Dict[str, Any]], operator.add]
13
+
14
+ all_chunks: List[Dict[str, Any]]
15
+
16
+ raw_decisions: Annotated[List[Dict[str, Any]], operator.add]
17
+ workflow_steps: Annotated[List[Dict[str, Any]], operator.add]
18
+ exception_rules: Annotated[List[Dict[str, Any]], operator.add]
19
+ contradictions: Annotated[List[Dict[str, Any]], operator.add]
20
+
21
+ draft_skills: List[Dict[str, Any]]
22
+ skills_with_evidence: List[Dict[str, Any]]
23
+ final_skills: List[Dict[str, Any]]
24
+
25
+ skills_file: Dict[str, Any]
26
  brain_version: str
27
  start_time: float
28
  errors: Annotated[List[str], operator.add]
backend/llm.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
  import json
 
 
3
  import numpy as np
4
  from openai import AsyncOpenAI
5
  from dotenv import load_dotenv
@@ -11,26 +13,34 @@ MODEL_NAME = "RedHatAI/Qwen2.5-72B-Instruct-FP8-dynamic"
11
 
12
  llm = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="not-needed", timeout=120.0)
13
 
 
 
 
14
  # --- Embedding model (local, fast, centralized here) ---
15
  _embedding_model = None
16
 
 
17
  def _get_embedding_model():
18
  global _embedding_model
19
  if _embedding_model is None:
20
  from sentence_transformers import SentenceTransformer
 
21
  _embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
22
  return _embedding_model
23
 
 
24
  def get_embedding(text: str) -> list:
25
  """Return a single embedding vector as a Python list."""
26
  model = _get_embedding_model()
27
  return model.encode(text).tolist()
28
 
 
29
  def get_embeddings(texts: list) -> list:
30
  """Return a list of embedding vectors."""
31
  model = _get_embedding_model()
32
  return [v.tolist() for v in model.encode(texts)]
33
 
 
34
  def cosine_similarity(v1, v2) -> float:
35
  """Cosine similarity between two vectors."""
36
  a, b = np.array(v1), np.array(v2)
@@ -39,6 +49,7 @@ def cosine_similarity(v1, v2) -> float:
39
  return 0.0
40
  return float(np.dot(a, b) / denom)
41
 
 
42
  async def check_vllm_health() -> dict:
43
  """Ping the vLLM /v1/models endpoint. Returns status dict."""
44
  try:
@@ -48,18 +59,102 @@ async def check_vllm_health() -> dict:
48
  except Exception as e:
49
  return {"healthy": False, "error": str(e), "url": VLLM_BASE_URL}
50
 
51
- async def llm_call(system_prompt: str, user_content: str, temperature: float = 0.1, max_tokens: int = 4096) -> str:
52
- """Single centralized LLM call through vLLM. Raises on failure."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  try:
54
- response = await llm.chat.completions.create(
55
- model=MODEL_NAME,
56
- messages=[
57
- {"role": "system", "content": system_prompt},
58
- {"role": "user", "content": user_content}
59
- ],
60
- temperature=temperature,
61
- max_tokens=max_tokens
 
 
 
 
 
 
 
62
  )
63
- return response.choices[0].message.content
64
- except Exception as e:
65
- raise RuntimeError(f"vLLM call failed ({VLLM_BASE_URL}): {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
+ import re
4
+ import asyncio
5
  import numpy as np
6
  from openai import AsyncOpenAI
7
  from dotenv import load_dotenv
 
13
 
14
  llm = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="not-needed", timeout=120.0)
15
 
16
+ # --- Concurrency throttle for parallel extraction ---
17
+ _semaphore = asyncio.Semaphore(4)
18
+
19
  # --- Embedding model (local, fast, centralized here) ---
20
  _embedding_model = None
21
 
22
+
23
  def _get_embedding_model():
24
  global _embedding_model
25
  if _embedding_model is None:
26
  from sentence_transformers import SentenceTransformer
27
+
28
  _embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
29
  return _embedding_model
30
 
31
+
32
  def get_embedding(text: str) -> list:
33
  """Return a single embedding vector as a Python list."""
34
  model = _get_embedding_model()
35
  return model.encode(text).tolist()
36
 
37
+
38
  def get_embeddings(texts: list) -> list:
39
  """Return a list of embedding vectors."""
40
  model = _get_embedding_model()
41
  return [v.tolist() for v in model.encode(texts)]
42
 
43
+
44
  def cosine_similarity(v1, v2) -> float:
45
  """Cosine similarity between two vectors."""
46
  a, b = np.array(v1), np.array(v2)
 
49
  return 0.0
50
  return float(np.dot(a, b) / denom)
51
 
52
+
53
  async def check_vllm_health() -> dict:
54
  """Ping the vLLM /v1/models endpoint. Returns status dict."""
55
  try:
 
59
  except Exception as e:
60
  return {"healthy": False, "error": str(e), "url": VLLM_BASE_URL}
61
 
62
+
63
+ async def llm_call(
64
+ system_prompt: str,
65
+ user_content: str,
66
+ temperature: float = 0.1,
67
+ max_tokens: int = 4096,
68
+ ) -> str:
69
+ """Single centralized LLM call through vLLM — uses semaphore for concurrency control."""
70
+ async with _semaphore:
71
+ try:
72
+ response = await llm.chat.completions.create(
73
+ model=MODEL_NAME,
74
+ messages=[
75
+ {"role": "system", "content": system_prompt},
76
+ {"role": "user", "content": user_content},
77
+ ],
78
+ temperature=temperature,
79
+ max_tokens=max_tokens,
80
+ )
81
+ return response.choices[0].message.content
82
+ except Exception as e:
83
+ raise RuntimeError(f"vLLM call failed ({VLLM_BASE_URL}): {e}")
84
+
85
+
86
+ # ─────────────────────────────────────────────
87
+ # JSON Self-Repair Utilities
88
+ # ─────────────────────────────────────────────
89
+
90
+
91
+ def _strip_fences(raw: str) -> str:
92
+ """Remove markdown code fences from LLM output."""
93
+ clean = raw.strip()
94
+ if clean.startswith("```json"):
95
+ clean = clean[7:]
96
+ elif clean.startswith("```"):
97
+ clean = clean[3:]
98
+ if clean.endswith("```"):
99
+ clean = clean[:-3]
100
+ return clean.strip()
101
+
102
+
103
+ def _repair_json(raw: str) -> str:
104
+ """Apply regex heuristics to repair common JSON formatting issues."""
105
+ # Remove trailing commas before closing brackets/braces
106
+ raw = re.sub(r",\s*([}\]])", r"\1", raw)
107
+ # Remove trailing comma at end of string
108
+ raw = re.sub(r",\s*$", "", raw)
109
+ # Ensure balanced brackets (simple count check)
110
+ return raw
111
+
112
+
113
+ async def safe_llm_json_call(
114
+ system_prompt: str,
115
+ user_content: str,
116
+ temperature: float = 0.1,
117
+ max_tokens: int = 4096,
118
+ ) -> list:
119
+ """
120
+ Call the LLM expecting a JSON array response.
121
+ Retries once on parse failure with a repair prompt.
122
+ Returns [] on final failure — never crashes the pipeline.
123
+ """
124
+ raw = await llm_call(system_prompt, user_content, temperature, max_tokens)
125
+ cleaned = _strip_fences(raw)
126
+ repaired = _repair_json(cleaned)
127
+
128
  try:
129
+ result = json.loads(repaired)
130
+ if isinstance(result, list):
131
+ return result
132
+ if isinstance(result, dict):
133
+ # Some nodes return {"skills": [...]} — unwrap
134
+ for key in ("skills", "items", "results", "data"):
135
+ if key in result and isinstance(result[key], list):
136
+ return result[key]
137
+ return [result]
138
+ return []
139
+ except json.JSONDecodeError:
140
+ # Retry once with a stricter prompt
141
+ retry_prompt = (
142
+ system_prompt
143
+ + "\n\nCRITICAL: Your previous response was not valid JSON. Return ONLY a valid JSON array. No markdown. No text outside the JSON."
144
  )
145
+ retry_user = f"The raw string that failed to parse was:\n\n{raw}\n\n---\n\nPlease redo the extraction correctly:\n{user_content}"
146
+ try:
147
+ raw2 = await llm_call(retry_prompt, retry_user, temperature, max_tokens)
148
+ cleaned2 = _strip_fences(raw2)
149
+ repaired2 = _repair_json(cleaned2)
150
+ result2 = json.loads(repaired2)
151
+ if isinstance(result2, list):
152
+ return result2
153
+ if isinstance(result2, dict):
154
+ for key in ("skills", "items", "results", "data"):
155
+ if key in result2 and isinstance(result2[key], list):
156
+ return result2[key]
157
+ return [result2]
158
+ return []
159
+ except Exception:
160
+ return []
backend/main.py CHANGED
@@ -7,15 +7,18 @@ import time
7
  import json
8
  import hashlib
9
  import shutil
 
 
 
10
 
11
  from backend.graph.graph import build_compilation_graph
12
  from backend.sse import event_bus, emit
13
  from backend.agent.brain_agent import handle_agent_query
14
- from backend.db.supabase import get_client
15
  from backend.llm import check_vllm_health
16
  from backend.models.schemas import CompileRequest, AgentHandleRequest, AgentQueryRequest
17
 
18
- app = FastAPI(title="Kernl API", version="2.0.0")
19
 
20
  app.add_middleware(
21
  CORSMiddleware,
@@ -52,7 +55,6 @@ def _company_sources_dir(company_id: str) -> str:
52
 
53
  @app.post("/sources/upload")
54
  async def upload_source(company_id: str = Form(...), file: UploadFile = File(...)):
55
- """Upload a source file for a company."""
56
  dest_dir = _company_sources_dir(company_id)
57
  os.makedirs(dest_dir, exist_ok=True)
58
 
@@ -63,16 +65,17 @@ async def upload_source(company_id: str = Form(...), file: UploadFile = File(...
63
 
64
  file_hash = hashlib.sha256(content).hexdigest()
65
 
66
- # Record in DB
67
  db = get_client()
68
  if db:
69
  try:
70
- db.table("source_files").insert({
71
- "company_id": company_id,
72
- "filename": file.filename,
73
- "sha256": file_hash,
74
- "storage_path": f"data/sources/{company_id}/{file.filename}",
75
- }).execute()
 
 
76
  except Exception as e:
77
  print(f"[upload] DB record error: {e}")
78
 
@@ -81,7 +84,6 @@ async def upload_source(company_id: str = Form(...), file: UploadFile = File(...
81
 
82
  @app.get("/sources/{company_id}")
83
  async def list_sources(company_id: str):
84
- """List all source files for a company."""
85
  src_dir = _company_sources_dir(company_id)
86
  if not os.path.isdir(src_dir):
87
  return {"files": []}
@@ -91,17 +93,18 @@ async def list_sources(company_id: str):
91
  if os.path.isfile(fp):
92
  with open(fp, "rb") as f:
93
  content = f.read()
94
- files.append({
95
- "filename": fn,
96
- "size_bytes": len(content),
97
- "sha256": hashlib.sha256(content).hexdigest(),
98
- })
 
 
99
  return {"files": files, "company_id": company_id}
100
 
101
 
102
  @app.delete("/sources/{company_id}/{filename}")
103
  async def delete_source(company_id: str, filename: str):
104
- """Delete a source file."""
105
  filepath = os.path.join(_company_sources_dir(company_id), filename)
106
  if not os.path.isfile(filepath):
107
  raise HTTPException(status_code=404, detail=f"File not found: {filename}")
@@ -110,9 +113,9 @@ async def delete_source(company_id: str, filename: str):
110
  db = get_client()
111
  if db:
112
  try:
113
- db.table("source_files").delete().eq(
114
- "company_id", company_id
115
- ).eq("filename", filename).execute()
116
  except Exception as e:
117
  print(f"[delete] DB cleanup error: {e}")
118
 
@@ -122,18 +125,24 @@ async def delete_source(company_id: str, filename: str):
122
  # ─────────────────────────────────────────────
123
  # Compilation pipeline
124
  # ─────────────────────────────────────────────
125
- import asyncio
126
- import traceback
127
- import datetime
128
 
129
  async def run_compilation_graph(job_id: str, company_id: str):
130
  initial_state = {
131
  "job_id": job_id,
132
  "company_id": company_id,
133
  "source_files": [],
134
- "chunks": [],
135
- "clusters": {},
136
- "raw_skills": [],
 
 
 
 
 
 
 
 
137
  "skills_file": {},
138
  "brain_version": "",
139
  "start_time": time.time(),
@@ -144,26 +153,28 @@ async def run_compilation_graph(job_id: str, company_id: str):
144
 
145
  await emit(job_id, "pipeline_start", {"company_id": company_id})
146
  try:
147
- # Prevent indefinite hanging
148
  await asyncio.wait_for(graph.ainvoke(initial_state), timeout=600.0)
149
  except Exception as e:
150
  err_msg = str(e)
151
  if isinstance(e, asyncio.TimeoutError):
152
  err_msg = "Pipeline execution timed out after 600 seconds."
153
-
154
  trace = traceback.format_exc()
155
  print(f"Graph execution failed for {job_id}:\n{trace}")
156
-
157
  await emit(job_id, "pipeline_error", {"error": err_msg, "traceback": trace})
158
- # Update compile run status
159
  db = get_client()
160
  if db:
161
  try:
162
- db.table("compile_runs").update({
163
- "status": "error",
164
- "completed_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
165
- "error_detail": err_msg,
166
- }).eq("id", job_id).execute()
 
 
 
 
167
  except Exception as db_e:
168
  print(f"Failed to update compile_runs with error status: {db_e}")
169
 
@@ -171,7 +182,6 @@ async def run_compilation_graph(job_id: str, company_id: str):
171
  @app.post("/compile")
172
  @app.post("/compile/run")
173
  async def compile_brain(req: CompileRequest, background_tasks: BackgroundTasks):
174
- # Verify source directory exists
175
  src_dir = _company_sources_dir(req.company_id)
176
  if not os.path.isdir(src_dir) or not os.listdir(src_dir):
177
  raise HTTPException(
@@ -184,11 +194,13 @@ async def compile_brain(req: CompileRequest, background_tasks: BackgroundTasks):
184
 
185
  if db:
186
  try:
187
- db.table("compile_runs").insert({
188
- "id": job_id,
189
- "company_id": req.company_id,
190
- "status": "running",
191
- }).execute()
 
 
192
  except Exception as e:
193
  print(f"Error creating run: {e}")
194
 
@@ -220,14 +232,14 @@ async def compile_status(job_id: str):
220
  # ─────────────────────────────────────────────
221
  @app.post("/agent/handle")
222
  async def agent_handle_endpoint(req: AgentHandleRequest):
223
- """Legacy endpoint kept for frontend compat."""
224
- result = await handle_agent_query(req.company_id, req.scenario, req.context, req.with_brain)
 
225
  return result
226
 
227
 
228
  @app.post("/agent/query")
229
  async def agent_query_endpoint(req: AgentQueryRequest):
230
- """New canonical endpoint."""
231
  result = await handle_agent_query(
232
  req.company_id,
233
  req.scenario_text,
@@ -242,13 +254,17 @@ async def agent_query_endpoint(req: AgentQueryRequest):
242
  # ─────────────────────────────────────────────
243
  @app.get("/skills")
244
  async def get_skills_legacy(company_id: str):
245
- """Legacy endpoint: returns raw brain_json."""
246
  db = get_client()
247
  if not db:
248
  raise HTTPException(status_code=500, detail="Database not connected")
249
- res = db.table("skills_files").select("brain_json").eq(
250
- "company_id", company_id
251
- ).order("compiled_at", desc=True).limit(1).execute()
 
 
 
 
 
252
  if not res.data:
253
  return {"skills": []}
254
  return res.data[0]["brain_json"]
@@ -256,14 +272,17 @@ async def get_skills_legacy(company_id: str):
256
 
257
  @app.get("/skills/{company_id}")
258
  async def get_skills(company_id: str):
259
- """Returns detailed skills with metadata."""
260
  db = get_client()
261
  if not db:
262
  raise HTTPException(status_code=500, detail="Database not connected")
263
 
264
- res = db.table("skills_files").select("*").eq(
265
- "company_id", company_id
266
- ).eq("is_current", True).execute()
 
 
 
 
267
 
268
  if not res.data:
269
  return {"skills": [], "version": None, "compiled_at": None}
@@ -281,30 +300,125 @@ async def get_skills(company_id: str):
281
 
282
  @app.get("/brain/versions/{company_id}")
283
  async def list_brain_versions(company_id: str):
284
- """Lists all brain versions for a company."""
285
  db = get_client()
286
  if not db:
287
  raise HTTPException(status_code=500, detail="Database not connected")
288
 
289
- res = db.table("skills_files").select(
290
- "id, version, compiled_at, is_current, source_hashes"
291
- ).eq("company_id", company_id).order("compiled_at", desc=True).execute()
 
 
 
 
292
 
293
  versions = []
294
  for row in res.data:
295
- brain_json = None
296
- # Get skill count from the full row
297
- full = db.table("skills_files").select("brain_json").eq("id", row["id"]).execute()
298
  skill_count = 0
299
  if full.data:
300
  skill_count = len(full.data[0]["brain_json"].get("skills", []))
301
- versions.append({
302
- "id": row["id"],
303
- "version": row["version"],
304
- "compiled_at": row["compiled_at"],
305
- "is_current": row["is_current"],
306
- "source_count": len(row.get("source_hashes", {})),
307
- "skill_count": skill_count,
308
- })
 
 
309
 
310
  return {"versions": versions, "company_id": company_id}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import json
8
  import hashlib
9
  import shutil
10
+ import asyncio
11
+ import traceback
12
+ import datetime
13
 
14
  from backend.graph.graph import build_compilation_graph
15
  from backend.sse import event_bus, emit
16
  from backend.agent.brain_agent import handle_agent_query
17
+ from backend.db.supabase import get_client, get_brain_by_version
18
  from backend.llm import check_vllm_health
19
  from backend.models.schemas import CompileRequest, AgentHandleRequest, AgentQueryRequest
20
 
21
+ app = FastAPI(title="Kernl API", version="2.1.0")
22
 
23
  app.add_middleware(
24
  CORSMiddleware,
 
55
 
56
  @app.post("/sources/upload")
57
  async def upload_source(company_id: str = Form(...), file: UploadFile = File(...)):
 
58
  dest_dir = _company_sources_dir(company_id)
59
  os.makedirs(dest_dir, exist_ok=True)
60
 
 
65
 
66
  file_hash = hashlib.sha256(content).hexdigest()
67
 
 
68
  db = get_client()
69
  if db:
70
  try:
71
+ db.table("source_files").insert(
72
+ {
73
+ "company_id": company_id,
74
+ "filename": file.filename,
75
+ "sha256": file_hash,
76
+ "storage_path": f"data/sources/{company_id}/{file.filename}",
77
+ }
78
+ ).execute()
79
  except Exception as e:
80
  print(f"[upload] DB record error: {e}")
81
 
 
84
 
85
  @app.get("/sources/{company_id}")
86
  async def list_sources(company_id: str):
 
87
  src_dir = _company_sources_dir(company_id)
88
  if not os.path.isdir(src_dir):
89
  return {"files": []}
 
93
  if os.path.isfile(fp):
94
  with open(fp, "rb") as f:
95
  content = f.read()
96
+ files.append(
97
+ {
98
+ "filename": fn,
99
+ "size_bytes": len(content),
100
+ "sha256": hashlib.sha256(content).hexdigest(),
101
+ }
102
+ )
103
  return {"files": files, "company_id": company_id}
104
 
105
 
106
  @app.delete("/sources/{company_id}/{filename}")
107
  async def delete_source(company_id: str, filename: str):
 
108
  filepath = os.path.join(_company_sources_dir(company_id), filename)
109
  if not os.path.isfile(filepath):
110
  raise HTTPException(status_code=404, detail=f"File not found: {filename}")
 
113
  db = get_client()
114
  if db:
115
  try:
116
+ db.table("source_files").delete().eq("company_id", company_id).eq(
117
+ "filename", filename
118
+ ).execute()
119
  except Exception as e:
120
  print(f"[delete] DB cleanup error: {e}")
121
 
 
125
  # ─────────────────────────────────────────────
126
  # Compilation pipeline
127
  # ─────────────────────────────────────────────
128
+
 
 
129
 
130
  async def run_compilation_graph(job_id: str, company_id: str):
131
  initial_state = {
132
  "job_id": job_id,
133
  "company_id": company_id,
134
  "source_files": [],
135
+ "structured_sops": [],
136
+ "normalized_events": [],
137
+ "resolved_cases": [],
138
+ "all_chunks": [],
139
+ "raw_decisions": [],
140
+ "workflow_steps": [],
141
+ "exception_rules": [],
142
+ "contradictions": [],
143
+ "draft_skills": [],
144
+ "skills_with_evidence": [],
145
+ "final_skills": [],
146
  "skills_file": {},
147
  "brain_version": "",
148
  "start_time": time.time(),
 
153
 
154
  await emit(job_id, "pipeline_start", {"company_id": company_id})
155
  try:
 
156
  await asyncio.wait_for(graph.ainvoke(initial_state), timeout=600.0)
157
  except Exception as e:
158
  err_msg = str(e)
159
  if isinstance(e, asyncio.TimeoutError):
160
  err_msg = "Pipeline execution timed out after 600 seconds."
161
+
162
  trace = traceback.format_exc()
163
  print(f"Graph execution failed for {job_id}:\n{trace}")
164
+
165
  await emit(job_id, "pipeline_error", {"error": err_msg, "traceback": trace})
 
166
  db = get_client()
167
  if db:
168
  try:
169
+ db.table("compile_runs").update(
170
+ {
171
+ "status": "error",
172
+ "completed_at": datetime.datetime.now(
173
+ datetime.timezone.utc
174
+ ).isoformat(),
175
+ "error_detail": err_msg,
176
+ }
177
+ ).eq("id", job_id).execute()
178
  except Exception as db_e:
179
  print(f"Failed to update compile_runs with error status: {db_e}")
180
 
 
182
  @app.post("/compile")
183
  @app.post("/compile/run")
184
  async def compile_brain(req: CompileRequest, background_tasks: BackgroundTasks):
 
185
  src_dir = _company_sources_dir(req.company_id)
186
  if not os.path.isdir(src_dir) or not os.listdir(src_dir):
187
  raise HTTPException(
 
194
 
195
  if db:
196
  try:
197
+ db.table("compile_runs").insert(
198
+ {
199
+ "id": job_id,
200
+ "company_id": req.company_id,
201
+ "status": "running",
202
+ }
203
+ ).execute()
204
  except Exception as e:
205
  print(f"Error creating run: {e}")
206
 
 
232
  # ─────────────────────────────────────────────
233
  @app.post("/agent/handle")
234
  async def agent_handle_endpoint(req: AgentHandleRequest):
235
+ result = await handle_agent_query(
236
+ req.company_id, req.scenario, req.context, req.with_brain
237
+ )
238
  return result
239
 
240
 
241
  @app.post("/agent/query")
242
  async def agent_query_endpoint(req: AgentQueryRequest):
 
243
  result = await handle_agent_query(
244
  req.company_id,
245
  req.scenario_text,
 
254
  # ─────────────────────────────────────────────
255
  @app.get("/skills")
256
  async def get_skills_legacy(company_id: str):
 
257
  db = get_client()
258
  if not db:
259
  raise HTTPException(status_code=500, detail="Database not connected")
260
+ res = (
261
+ db.table("skills_files")
262
+ .select("brain_json")
263
+ .eq("company_id", company_id)
264
+ .order("compiled_at", desc=True)
265
+ .limit(1)
266
+ .execute()
267
+ )
268
  if not res.data:
269
  return {"skills": []}
270
  return res.data[0]["brain_json"]
 
272
 
273
  @app.get("/skills/{company_id}")
274
  async def get_skills(company_id: str):
 
275
  db = get_client()
276
  if not db:
277
  raise HTTPException(status_code=500, detail="Database not connected")
278
 
279
+ res = (
280
+ db.table("skills_files")
281
+ .select("*")
282
+ .eq("company_id", company_id)
283
+ .eq("is_current", True)
284
+ .execute()
285
+ )
286
 
287
  if not res.data:
288
  return {"skills": [], "version": None, "compiled_at": None}
 
300
 
301
  @app.get("/brain/versions/{company_id}")
302
  async def list_brain_versions(company_id: str):
 
303
  db = get_client()
304
  if not db:
305
  raise HTTPException(status_code=500, detail="Database not connected")
306
 
307
+ res = (
308
+ db.table("skills_files")
309
+ .select("id, version, compiled_at, is_current, source_hashes")
310
+ .eq("company_id", company_id)
311
+ .order("compiled_at", desc=True)
312
+ .execute()
313
+ )
314
 
315
  versions = []
316
  for row in res.data:
317
+ full = (
318
+ db.table("skills_files").select("brain_json").eq("id", row["id"]).execute()
319
+ )
320
  skill_count = 0
321
  if full.data:
322
  skill_count = len(full.data[0]["brain_json"].get("skills", []))
323
+ versions.append(
324
+ {
325
+ "id": row["id"],
326
+ "version": row["version"],
327
+ "compiled_at": row["compiled_at"],
328
+ "is_current": row["is_current"],
329
+ "source_count": len(row.get("source_hashes", {})),
330
+ "skill_count": skill_count,
331
+ }
332
+ )
333
 
334
  return {"versions": versions, "company_id": company_id}
335
+
336
+
337
+ # ─────────────────────────────────────────────
338
+ # Semantic Diff Engine
339
+ # ─────────────────────────────────────────────
340
+ @app.get("/diff/{v1}/{v2}")
341
+ async def semantic_diff(v1: str, v2: str, company_id: str = "rivanly-inc"):
342
+ db = get_client()
343
+ if not db:
344
+ raise HTTPException(status_code=500, detail="Database not connected")
345
+
346
+ brain_v1 = get_brain_by_version(company_id, v1)
347
+ brain_v2 = get_brain_by_version(company_id, v2)
348
+
349
+ if not brain_v1 or not brain_v2:
350
+ raise HTTPException(
351
+ status_code=404, detail="One or both brain versions not found"
352
+ )
353
+
354
+ skills_v1 = {
355
+ s.get("id", f"idx_{i}"): s
356
+ for i, s in enumerate(brain_v1["brain_json"].get("skills", []))
357
+ }
358
+ skills_v2 = {
359
+ s.get("id", f"idx_{i}"): s
360
+ for i, s in enumerate(brain_v2["brain_json"].get("skills", []))
361
+ }
362
+
363
+ ids_v1 = set(skills_v1.keys())
364
+ ids_v2 = set(skills_v2.keys())
365
+
366
+ added_ids = ids_v2 - ids_v1
367
+ deleted_ids = ids_v1 - ids_v2
368
+ common_ids = ids_v1 & ids_v2
369
+
370
+ added = [
371
+ {"id": sid, "name": skills_v2[sid].get("rule", "")[:100]}
372
+ for sid in sorted(added_ids)
373
+ ]
374
+ deleted = [
375
+ {"id": sid, "name": skills_v1[sid].get("rule", "")[:100]}
376
+ for sid in sorted(deleted_ids)
377
+ ]
378
+
379
+ modified = []
380
+ confidence_shifts = []
381
+
382
+ for sid in sorted(common_ids):
383
+ s1, s2 = skills_v1[sid], skills_v2[sid]
384
+ for field in ("rule", "rationale"):
385
+ v1_val = str(s1.get(field, ""))
386
+ v2_val = str(s2.get(field, ""))
387
+ if v1_val != v2_val:
388
+ modified.append(
389
+ {
390
+ "id": sid,
391
+ "field": field,
392
+ "old_value": v1_val[:200],
393
+ "new_value": v2_val[:200],
394
+ }
395
+ )
396
+
397
+ c1 = float(s1.get("confidence", 0))
398
+ c2 = float(s2.get("confidence", 0))
399
+ if abs(c1 - c2) > 0.01:
400
+ confidence_shifts.append(
401
+ {
402
+ "id": sid,
403
+ "old_confidence": c1,
404
+ "new_confidence": c2,
405
+ "reason": "Confidence recalculated based on source evidence and contradictions",
406
+ }
407
+ )
408
+
409
+ return {
410
+ "v1_version": v1,
411
+ "v2_version": v2,
412
+ "added": added,
413
+ "deleted": deleted,
414
+ "modified": modified,
415
+ "confidence_shifts": confidence_shifts,
416
+ "summary": {
417
+ "v1_skills": len(skills_v1),
418
+ "v2_skills": len(skills_v2),
419
+ "added_count": len(added),
420
+ "deleted_count": len(deleted),
421
+ "modified_count": len(modified),
422
+ "confidence_shift_count": len(confidence_shifts),
423
+ },
424
+ }
backend/models/schemas.py CHANGED
@@ -1,20 +1,59 @@
1
  from pydantic import BaseModel
2
  from typing import List, Optional, Dict, Any
3
 
 
4
  class CompileRequest(BaseModel):
5
  company_id: str
6
  force_recompile: bool = False
7
 
 
8
  class AgentHandleRequest(BaseModel):
9
  """Legacy schema — kept for frontend compatibility."""
 
10
  company_id: str
11
  scenario: str
12
  context: Optional[Dict[str, Any]] = None
13
  with_brain: bool = True
14
 
 
15
  class AgentQueryRequest(BaseModel):
16
  """New canonical schema for agent queries."""
 
17
  company_id: str
18
  scenario_text: str
19
  json_context: Optional[Dict[str, Any]] = None
20
  with_brain: bool = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pydantic import BaseModel
2
  from typing import List, Optional, Dict, Any
3
 
4
+
5
  class CompileRequest(BaseModel):
6
  company_id: str
7
  force_recompile: bool = False
8
 
9
+
10
  class AgentHandleRequest(BaseModel):
11
  """Legacy schema — kept for frontend compatibility."""
12
+
13
  company_id: str
14
  scenario: str
15
  context: Optional[Dict[str, Any]] = None
16
  with_brain: bool = True
17
 
18
+
19
  class AgentQueryRequest(BaseModel):
20
  """New canonical schema for agent queries."""
21
+
22
  company_id: str
23
  scenario_text: str
24
  json_context: Optional[Dict[str, Any]] = None
25
  with_brain: bool = True
26
+
27
+
28
+ class DiffRequest(BaseModel):
29
+ version_v1: str
30
+ version_v2: str
31
+ company_id: str
32
+
33
+
34
+ class DiffItem(BaseModel):
35
+ id: str
36
+ name: str = ""
37
+
38
+
39
+ class DiffModified(BaseModel):
40
+ id: str
41
+ field: str
42
+ old_value: Any = None
43
+ new_value: Any = None
44
+
45
+
46
+ class DiffConfidenceShift(BaseModel):
47
+ id: str
48
+ old_confidence: float = 0.0
49
+ new_confidence: float = 0.0
50
+ reason: str = ""
51
+
52
+
53
+ class DiffResponse(BaseModel):
54
+ v1_version: str
55
+ v2_version: str
56
+ added: List[DiffItem] = []
57
+ deleted: List[DiffItem] = []
58
+ modified: List[DiffModified] = []
59
+ confidence_shifts: List[DiffConfidenceShift] = []
backend/test_compile.py CHANGED
@@ -5,15 +5,14 @@ import uuid
5
  import sys
6
  from dotenv import load_dotenv
7
 
8
- # Set backend in path
9
  sys.path.append(os.path.dirname(os.path.dirname(__file__)))
10
 
11
  from backend.graph.graph import build_compilation_graph
12
 
 
13
  async def run_compilation_test():
14
  load_dotenv()
15
-
16
- # Check vLLM
17
  vllm_url = os.getenv("VLLM_BASE_URL")
18
  if not vllm_url:
19
  print("VLLM_BASE_URL not set in .env. LLM calls will fail.")
@@ -22,68 +21,108 @@ async def run_compilation_test():
22
 
23
  company_id = "rivanly-inc"
24
  job_id = str(uuid.uuid4())
25
-
26
- # Read files
27
  source_files = []
28
- sources_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sources")
 
 
29
  if os.path.exists(sources_dir):
30
  import hashlib
 
31
  for filename in os.listdir(sources_dir):
32
  filepath = os.path.join(sources_dir, filename)
33
- with open(filepath, "r", encoding="utf-8") as f:
34
- content = f.read()
35
-
36
- ftype = "unknown"
37
- if filename.endswith(".json"):
38
- if "slack" in filename: ftype = "slack_json"
39
- elif "tickets" in filename: ftype = "tickets_json"
40
- elif filename.endswith(".md"):
41
- ftype = "notion_md"
42
-
43
- source_files.append({
44
- "filename": filename,
45
- "content": content,
46
- "type": ftype,
47
- "sha256": hashlib.sha256(content.encode('utf-8')).hexdigest()
48
- })
 
 
 
49
  else:
50
  print(f"No sources dir found at {sources_dir}")
51
  return
52
 
53
- print(f"Found {len(source_files)} source files. Starting graph...")
54
-
 
 
55
  initial_state = {
56
  "job_id": job_id,
57
  "company_id": company_id,
58
- "source_files": source_files,
59
  "structured_sops": [],
60
  "normalized_events": [],
61
  "resolved_cases": [],
62
- "extracted_decisions": [],
63
- "extracted_workflows": [],
64
- "extracted_exceptions": [],
65
- "detected_contradictions": [],
66
- "skills_file": {}
 
 
 
 
 
 
 
67
  }
68
-
69
  graph = build_compilation_graph()
70
-
71
  try:
72
  final_state = await graph.ainvoke(initial_state)
73
  print("\n=== COMPILATION COMPLETE ===")
74
- print(f"Extracted Decisions: {len(final_state.get('extracted_decisions', []))}")
75
- print(f"Detected Contradictions: {len(final_state.get('detected_contradictions', []))}")
76
- for c in final_state.get('detected_contradictions', []):
77
- print(f" - Contradiction: {c}")
78
-
79
- skills_file = final_state.get('skills_file', {})
80
- skills = skills_file.get('skills', [])
81
- print(f"Generated Skills: {len(skills)}")
82
- for s in skills:
83
- print(f" - {s.get('id')} ({s.get('confidence')} conf)")
84
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  except Exception as e:
86
  print(f"Graph execution failed: {e}")
 
 
 
 
87
 
88
  if __name__ == "__main__":
89
  asyncio.run(run_compilation_test())
 
5
  import sys
6
  from dotenv import load_dotenv
7
 
 
8
  sys.path.append(os.path.dirname(os.path.dirname(__file__)))
9
 
10
  from backend.graph.graph import build_compilation_graph
11
 
12
+
13
  async def run_compilation_test():
14
  load_dotenv()
15
+
 
16
  vllm_url = os.getenv("VLLM_BASE_URL")
17
  if not vllm_url:
18
  print("VLLM_BASE_URL not set in .env. LLM calls will fail.")
 
21
 
22
  company_id = "rivanly-inc"
23
  job_id = str(uuid.uuid4())
24
+
 
25
  source_files = []
26
+ sources_dir = os.path.join(
27
+ os.path.dirname(os.path.dirname(__file__)), "data", "sources"
28
+ )
29
  if os.path.exists(sources_dir):
30
  import hashlib
31
+
32
  for filename in os.listdir(sources_dir):
33
  filepath = os.path.join(sources_dir, filename)
34
+ if os.path.isfile(filepath):
35
+ with open(filepath, "r", encoding="utf-8") as f:
36
+ content = f.read()
37
+ ftype = "unknown"
38
+ if filename.endswith(".json"):
39
+ if "slack" in filename:
40
+ ftype = "slack_json"
41
+ elif "tickets" in filename:
42
+ ftype = "tickets_json"
43
+ elif filename.endswith(".md"):
44
+ ftype = "notion_md"
45
+ source_files.append(
46
+ {
47
+ "filename": filename,
48
+ "content": content,
49
+ "type": ftype,
50
+ "sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
51
+ }
52
+ )
53
  else:
54
  print(f"No sources dir found at {sources_dir}")
55
  return
56
 
57
+ print(
58
+ f"Found {len(source_files)} source files. Starting parallel multi-agent graph..."
59
+ )
60
+
61
  initial_state = {
62
  "job_id": job_id,
63
  "company_id": company_id,
64
+ "source_files": [], # load_sources reads from disk
65
  "structured_sops": [],
66
  "normalized_events": [],
67
  "resolved_cases": [],
68
+ "all_chunks": [],
69
+ "raw_decisions": [],
70
+ "workflow_steps": [],
71
+ "exception_rules": [],
72
+ "contradictions": [],
73
+ "draft_skills": [],
74
+ "skills_with_evidence": [],
75
+ "final_skills": [],
76
+ "skills_file": {},
77
+ "brain_version": "",
78
+ "start_time": __import__("time").time(),
79
+ "errors": [],
80
  }
81
+
82
  graph = build_compilation_graph()
83
+
84
  try:
85
  final_state = await graph.ainvoke(initial_state)
86
  print("\n=== COMPILATION COMPLETE ===")
87
+
88
+ raw_decisions = final_state.get("raw_decisions", [])
89
+ workflow_steps = final_state.get("workflow_steps", [])
90
+ exception_rules = final_state.get("exception_rules", [])
91
+ contradictions = final_state.get("contradictions", [])
92
+
93
+ print(f"Raw Decisions: {len(raw_decisions)}")
94
+ print(f"Workflow Steps: {len(workflow_steps)}")
95
+ print(f"Exception Rules: {len(exception_rules)}")
96
+ print(f"Contradictions: {len(contradictions)}")
97
+
98
+ for c in contradictions:
99
+ print(
100
+ f" - Contradiction: {c.get('claim_a', '')[:80]} vs {c.get('claim_b', '')[:80]}"
101
+ )
102
+
103
+ final_skills = final_state.get("final_skills", [])
104
+ print(f"\nFinal Skills: {len(final_skills)}")
105
+ for s in final_skills:
106
+ print(
107
+ f" - {s.get('id')} ({s.get('confidence')} conf) [{s.get('category')}]"
108
+ )
109
+ print(f" Rule: {s.get('rule', '')[:100]}")
110
+ ev = s.get("evidence", [])
111
+ if ev:
112
+ print(f" Evidence: {len(ev)} sources")
113
+
114
+ skills_file = final_state.get("skills_file", {})
115
+ if skills_file:
116
+ print(
117
+ f"\nBrain version: {skills_file.get('meta', {}).get('compiled_at', 'N/A')}"
118
+ )
119
+
120
  except Exception as e:
121
  print(f"Graph execution failed: {e}")
122
+ import traceback
123
+
124
+ traceback.print_exc()
125
+
126
 
127
  if __name__ == "__main__":
128
  asyncio.run(run_compilation_test())
data/sources/rivanly-inc/notion_refund_sop.md CHANGED
@@ -13,4 +13,4 @@ Our refund policy is designed to balance customer satisfaction with revenue rete
13
  - **Monthly Plans (New Customers):** If a customer on a monthly plan with a tenure of less than 3 months requests a refund over $500, escalate to the Founder.
14
 
15
  ## 3. Strict Time Limits
16
- **CRITICAL:** We offer absolutely no refunds after 30 days of purchase for any customer tier. If the purchase was more than 30 days ago, deny the refund.
 
13
  - **Monthly Plans (New Customers):** If a customer on a monthly plan with a tenure of less than 3 months requests a refund over $500, escalate to the Founder.
14
 
15
  ## 3. Strict Time Limits
16
+ **CRITICAL:** We offer absolutely no refunds after 60 days of purchase for any customer tier. If the purchase was more than 60 days ago, deny the refund.
frontend/src/app/compile/[jobId]/page.tsx CHANGED
@@ -12,14 +12,26 @@ interface LogEvent {
12
  const STAGE_LABELS: Record<string, string> = {
13
  pipeline_start: "🚀 Pipeline Started",
14
  LOADING_DOCS: "📂 Loading Documents",
15
- CHUNKING: "✂️ Chunking Documents",
16
- CHUNKING_DONE: " Chunking Complete",
17
- EMBEDDING: "🧠 Embedding & Clustering",
18
- EMBEDDING_DONE: " Clustering Complete",
19
- SYNTHESIZING_SKILLS: " Synthesizing Skills",
20
- QUALITY_CHECK: "🔍 Quality & Confidence Scoring",
21
- QUALITY_CHECK_DONE: "✅ Quality Check Complete",
22
- WRITING_DB: "💾 Writing to Database",
 
 
 
 
 
 
 
 
 
 
 
 
23
  DONE: "✅ Pipeline Complete",
24
  pipeline_complete: "🎉 Compilation Finished",
25
  pipeline_error: "❌ Pipeline Error",
 
12
  const STAGE_LABELS: Record<string, string> = {
13
  pipeline_start: "🚀 Pipeline Started",
14
  LOADING_DOCS: "📂 Loading Documents",
15
+ LOADING_DOCS_DONE: " Sources Loaded",
16
+ INGEST_NOTION: "📝 Ingesting SOPs",
17
+ INGEST_SLACK: "💬 Ingesting Slack Messages",
18
+ INGEST_TICKETS: "🎫 Ingesting Support Tickets",
19
+ INGEST_JOIN: "🔗 Merging All Chunks",
20
+ EXTRACT_DECISIONS: "⚖️ Extracting Rules & Policies",
21
+ EXTRACT_DECISIONS_DONE: "✅ Rules Extracted",
22
+ EXTRACT_WORKFLOWS: "🔁 Extracting Workflows",
23
+ EXTRACT_WORKFLOWS_DONE: "✅ Workflows Extracted",
24
+ EXTRACT_EXCEPTIONS: "⚠️ Extracting Exceptions & Edge Cases",
25
+ EXTRACT_EXCEPTIONS_DONE: "✅ Exceptions Extracted",
26
+ DETECT_CONTRADICTIONS: "🔄 Detecting Cross-Source Contradictions",
27
+ DETECT_CONTRADICTIONS_DONE: "✅ Contradictions Analyzed",
28
+ SYNTHESIZING_SKILLS: "⚡ Synthesizing Skills from All Extractions",
29
+ SYNTHESIZING_DONE: "✅ Skills Synthesized",
30
+ LINKING_EVIDENCE: "🔗 Linking Evidence to Skills",
31
+ LINKING_DONE: "✅ Evidence Linked",
32
+ SCORING_CONFIDENCE: "📊 Scoring Confidence",
33
+ SCORING_DONE: "✅ Confidence Scored",
34
+ WRITING_DB: "💾 Pre-embedding & Writing to Database",
35
  DONE: "✅ Pipeline Complete",
36
  pipeline_complete: "🎉 Compilation Finished",
37
  pipeline_error: "❌ Pipeline Error",
scripts/smoke_test.py CHANGED
@@ -7,6 +7,7 @@ Usage:
7
 
8
  Requires: backend running on http://localhost:8080
9
  """
 
10
  import requests
11
  import time
12
  import sys
@@ -51,7 +52,7 @@ def compile_and_wait():
51
  # Poll the compile stream for completion
52
  for attempt in range(60): # max 5 minutes
53
  time.sleep(5)
54
-
55
  # Check job status explicitly
56
  try:
57
  status_req = requests.get(f"{API}/compile/{job_id}/status")
@@ -59,7 +60,9 @@ def compile_and_wait():
59
  job_info = status_req.json()
60
  if job_info.get("status") == "error":
61
  print(f" [ERROR] Job failed: {job_info.get('error_detail')}")
62
- raise RuntimeError(f"Compilation job failed: {job_info.get('error_detail')}")
 
 
63
  if job_info.get("status") == "complete":
64
  # Fetch skills
65
  sk = requests.get(f"{API}/skills/{COMPANY}")
@@ -73,7 +76,7 @@ def compile_and_wait():
73
  raise
74
  pass
75
 
76
- print(f" Waiting... ({(attempt+1)*5}s)")
77
 
78
  # Timeout reached. Fetch final status.
79
  final_status = "Unknown"
@@ -87,7 +90,9 @@ def compile_and_wait():
87
  except Exception:
88
  pass
89
 
90
- raise TimeoutError(f"Compilation did not complete within 5 minutes. Final status: {final_status}, Error: {final_error}")
 
 
91
 
92
 
93
  def get_skills():
@@ -97,11 +102,14 @@ def get_skills():
97
 
98
 
99
  def query_agent(scenario: str, context: dict = None):
100
- r = requests.post(f"{API}/agent/query", json={
101
- "company_id": COMPANY,
102
- "scenario_text": scenario,
103
- "json_context": context or {},
104
- })
 
 
 
105
  assert r.status_code == 200, f"Agent query failed: {r.text}"
106
  return r.json()
107
 
@@ -116,7 +124,9 @@ def test_gibberish():
116
  if confidence < 0.4:
117
  print(" [PASS] Low confidence for gibberish")
118
  else:
119
- print(f" [WARN] Confidence {confidence} is higher than expected for gibberish")
 
 
120
 
121
 
122
  def test_dynamic_policy_change():
@@ -138,14 +148,18 @@ def test_dynamic_policy_change():
138
  print("\n Step B: Query agent about refunds (original policy)...")
139
  result_v1 = query_agent(
140
  "Customer requesting a refund after 45 days",
141
- {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6}
142
  )
143
  print(f" v1 action: {result_v1.get('recommended_action')}")
144
  print(f" v1 rule: {result_v1.get('rule_applied', 'N/A')}")
145
 
146
  # Now modify the SOP - change the refund window
147
  print("\n Step C: Modifying SOP (changing refund window)...")
148
- modified_sop = original_sop.replace("30 day", "60 day").replace("30-day", "60-day").replace("30 days", "60 days")
 
 
 
 
149
  if modified_sop == original_sop:
150
  # Try alternative patterns
151
  modified_sop = original_sop.replace("30", "60")
@@ -166,7 +180,7 @@ def test_dynamic_policy_change():
166
  print("\n Step E: Query agent about refunds (modified policy)...")
167
  result_v2 = query_agent(
168
  "Customer requesting a refund after 45 days",
169
- {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6}
170
  )
171
  print(f" v2 action: {result_v2.get('recommended_action')}")
172
  print(f" v2 rule: {result_v2.get('rule_applied', 'N/A')}")
@@ -178,11 +192,22 @@ def test_dynamic_policy_change():
178
  # Check if actions actually changed based on policy
179
  v1_action_lower = str(result_v1.get("recommended_action", "")).lower()
180
  v2_action_lower = str(result_v2.get("recommended_action", "")).lower()
181
-
182
  # Under 30 days limit (v1), 45 days should be denied/not allowed
183
  # Under 60 days limit (v2), 45 days should be approved/prorated
184
- policy_executed_correctly = ("deny" in v1_action_lower or "no refund" in v1_action_lower or "not eligible" in v1_action_lower or "cannot" in v1_action_lower) and ("approve" in v2_action_lower or "prorated" in v2_action_lower or "allow" in v2_action_lower)
185
- print(f" Policy execution behavior changed appropriately (Deny -> Approve): {policy_executed_correctly}")
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  # Restore original SOP
188
  print("\n Step F: Restoring original SOP...")
@@ -197,11 +222,62 @@ def test_dynamic_policy_change():
197
  print(" [FAIL] Skills did NOT change - system may still be static")
198
 
199
  if policy_executed_correctly:
200
- print(" [PASS] Agent correctly executed the policy change (Denied at 45 days under 30-day SOP, Approved under 60-day SOP!)")
 
 
201
  elif v2_mentions_60:
202
  print(" [PASS] Agent response reflects the modified policy (60 days)")
203
  else:
204
- print(" [WARN] Agent response did not change behavior or mention the new policy")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
 
207
  def main():
@@ -213,7 +289,9 @@ def main():
213
  check_health()
214
  except Exception as e:
215
  print(f" [FATAL] API not reachable: {e}")
216
- print(" Make sure backend is running: python -m uvicorn backend.main:app --port 8080")
 
 
217
  sys.exit(1)
218
 
219
  # Test 1: Compile and get skills
@@ -240,6 +318,12 @@ def main():
240
  if os.path.exists(SOP_PATH):
241
  print(" Attempting to restore original SOP...")
242
 
 
 
 
 
 
 
243
  print("\n" + "=" * 60)
244
  print("SMOKE TEST COMPLETE")
245
  print("=" * 60)
 
7
 
8
  Requires: backend running on http://localhost:8080
9
  """
10
+
11
  import requests
12
  import time
13
  import sys
 
52
  # Poll the compile stream for completion
53
  for attempt in range(60): # max 5 minutes
54
  time.sleep(5)
55
+
56
  # Check job status explicitly
57
  try:
58
  status_req = requests.get(f"{API}/compile/{job_id}/status")
 
60
  job_info = status_req.json()
61
  if job_info.get("status") == "error":
62
  print(f" [ERROR] Job failed: {job_info.get('error_detail')}")
63
+ raise RuntimeError(
64
+ f"Compilation job failed: {job_info.get('error_detail')}"
65
+ )
66
  if job_info.get("status") == "complete":
67
  # Fetch skills
68
  sk = requests.get(f"{API}/skills/{COMPANY}")
 
76
  raise
77
  pass
78
 
79
+ print(f" Waiting... ({(attempt + 1) * 5}s)")
80
 
81
  # Timeout reached. Fetch final status.
82
  final_status = "Unknown"
 
90
  except Exception:
91
  pass
92
 
93
+ raise TimeoutError(
94
+ f"Compilation did not complete within 5 minutes. Final status: {final_status}, Error: {final_error}"
95
+ )
96
 
97
 
98
  def get_skills():
 
102
 
103
 
104
  def query_agent(scenario: str, context: dict = None):
105
+ r = requests.post(
106
+ f"{API}/agent/query",
107
+ json={
108
+ "company_id": COMPANY,
109
+ "scenario_text": scenario,
110
+ "json_context": context or {},
111
+ },
112
+ )
113
  assert r.status_code == 200, f"Agent query failed: {r.text}"
114
  return r.json()
115
 
 
124
  if confidence < 0.4:
125
  print(" [PASS] Low confidence for gibberish")
126
  else:
127
+ print(
128
+ f" [WARN] Confidence {confidence} is higher than expected for gibberish"
129
+ )
130
 
131
 
132
  def test_dynamic_policy_change():
 
148
  print("\n Step B: Query agent about refunds (original policy)...")
149
  result_v1 = query_agent(
150
  "Customer requesting a refund after 45 days",
151
+ {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6},
152
  )
153
  print(f" v1 action: {result_v1.get('recommended_action')}")
154
  print(f" v1 rule: {result_v1.get('rule_applied', 'N/A')}")
155
 
156
  # Now modify the SOP - change the refund window
157
  print("\n Step C: Modifying SOP (changing refund window)...")
158
+ modified_sop = (
159
+ original_sop.replace("30 day", "60 day")
160
+ .replace("30-day", "60-day")
161
+ .replace("30 days", "60 days")
162
+ )
163
  if modified_sop == original_sop:
164
  # Try alternative patterns
165
  modified_sop = original_sop.replace("30", "60")
 
180
  print("\n Step E: Query agent about refunds (modified policy)...")
181
  result_v2 = query_agent(
182
  "Customer requesting a refund after 45 days",
183
+ {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6},
184
  )
185
  print(f" v2 action: {result_v2.get('recommended_action')}")
186
  print(f" v2 rule: {result_v2.get('rule_applied', 'N/A')}")
 
192
  # Check if actions actually changed based on policy
193
  v1_action_lower = str(result_v1.get("recommended_action", "")).lower()
194
  v2_action_lower = str(result_v2.get("recommended_action", "")).lower()
195
+
196
  # Under 30 days limit (v1), 45 days should be denied/not allowed
197
  # Under 60 days limit (v2), 45 days should be approved/prorated
198
+ policy_executed_correctly = (
199
+ "deny" in v1_action_lower
200
+ or "no refund" in v1_action_lower
201
+ or "not eligible" in v1_action_lower
202
+ or "cannot" in v1_action_lower
203
+ ) and (
204
+ "approve" in v2_action_lower
205
+ or "prorated" in v2_action_lower
206
+ or "allow" in v2_action_lower
207
+ )
208
+ print(
209
+ f" Policy execution behavior changed appropriately (Deny -> Approve): {policy_executed_correctly}"
210
+ )
211
 
212
  # Restore original SOP
213
  print("\n Step F: Restoring original SOP...")
 
222
  print(" [FAIL] Skills did NOT change - system may still be static")
223
 
224
  if policy_executed_correctly:
225
+ print(
226
+ " [PASS] Agent correctly executed the policy change (Denied at 45 days under 30-day SOP, Approved under 60-day SOP!)"
227
+ )
228
  elif v2_mentions_60:
229
  print(" [PASS] Agent response reflects the modified policy (60 days)")
230
  else:
231
+ print(
232
+ " [WARN] Agent response did not change behavior or mention the new policy"
233
+ )
234
+
235
+
236
+ def test_semantic_diff():
237
+ """Test the /diff/{v1}/{v2} endpoint."""
238
+ print("\n5. Testing semantic diff engine...")
239
+
240
+ # Get version history
241
+ r = requests.get(f"{API}/brain/versions/{COMPANY}")
242
+ if r.status_code != 200:
243
+ print(" [SKIP] Could not fetch version history")
244
+ return
245
+
246
+ versions = r.json().get("versions", [])
247
+ if len(versions) < 2:
248
+ print(" [SKIP] Need at least 2 compiled versions for diff")
249
+ return
250
+
251
+ v1 = versions[1]["version"]
252
+ v2 = versions[0]["version"]
253
+ print(f" Comparing {v1} → {v2}")
254
+
255
+ r = requests.get(f"{API}/diff/{v1}/{v2}", params={"company_id": COMPANY})
256
+ if r.status_code != 200:
257
+ print(f" [FAIL] Diff endpoint returned {r.status_code}: {r.text}")
258
+ return
259
+
260
+ diff = r.json()
261
+ summary = diff.get("summary", {})
262
+ print(
263
+ f" Added: {summary.get('added_count', 0)}, Deleted: {summary.get('deleted_count', 0)}, Modified: {summary.get('modified_count', 0)}"
264
+ )
265
+ print(f" Confidence shifts: {summary.get('confidence_shift_count', 0)}")
266
+ print(
267
+ f" V1 skills: {summary.get('v1_skills', 0)} → V2 skills: {summary.get('v2_skills', 0)}"
268
+ )
269
+
270
+ if (
271
+ summary.get("added_count", 0) > 0
272
+ or summary.get("modified_count", 0) > 0
273
+ or summary.get("deleted_count", 0) > 0
274
+ or summary.get("confidence_shift_count", 0) > 0
275
+ ):
276
+ print(" [PASS] Semantic diff detected changes between versions")
277
+ else:
278
+ print(
279
+ " [WARN] Diff returned no changes — may indicate skills didn't change or diff has a bug"
280
+ )
281
 
282
 
283
  def main():
 
289
  check_health()
290
  except Exception as e:
291
  print(f" [FATAL] API not reachable: {e}")
292
+ print(
293
+ " Make sure backend is running: python -m uvicorn backend.main:app --port 8080"
294
+ )
295
  sys.exit(1)
296
 
297
  # Test 1: Compile and get skills
 
318
  if os.path.exists(SOP_PATH):
319
  print(" Attempting to restore original SOP...")
320
 
321
+ # Test 4: Semantic diff
322
+ try:
323
+ test_semantic_diff()
324
+ except Exception as e:
325
+ print(f" [ERROR] Diff test failed: {e}")
326
+
327
  print("\n" + "=" * 60)
328
  print("SMOKE TEST COMPLETE")
329
  print("=" * 60)
scripts/stress_test.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Stress test: proves compiler resilience under adversarial conditions.
3
+ - Malformed markdown injection
4
+ - Contradictory policy data
5
+ - Semantic diff verification
6
+ - Concurrency limit verification
7
+
8
+ Usage:
9
+ python scripts/stress_test.py
10
+
11
+ Requires: backend running on http://localhost:8080
12
+ """
13
+
14
+ import requests
15
+ import time
16
+ import sys
17
+ import os
18
+ import json
19
+
20
+ API = "http://localhost:8080"
21
+ COMPANY = "rivanly-inc"
22
+
23
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
24
+ TEST_DIR = os.path.join(BASE_DIR, "data", "sources", COMPANY)
25
+
26
+
27
+ def check_health():
28
+ print("1. Checking API health...")
29
+ r = requests.get(f"{API}/health")
30
+ assert r.status_code == 200, f"Health check failed: {r.text}"
31
+ data = r.json()
32
+ print(f" API: {data['status']}, vLLM: {data['vllm']}, DB: {data['database']}")
33
+ return True
34
+
35
+
36
+ def compile_and_wait(label="Compile"):
37
+ """Trigger compilation and poll until complete."""
38
+ print(f" [{label}] Triggering compilation...")
39
+ r = requests.post(f"{API}/compile", json={"company_id": COMPANY})
40
+ assert r.status_code == 200, f"Compile failed: {r.text}"
41
+ job_id = r.json()["job_id"]
42
+ print(f" Job ID: {job_id}")
43
+
44
+ for attempt in range(60):
45
+ time.sleep(5)
46
+ try:
47
+ status_req = requests.get(f"{API}/compile/{job_id}/status")
48
+ if status_req.status_code == 200:
49
+ job_info = status_req.json()
50
+ if job_info.get("status") == "error":
51
+ print(f" [FAIL] Job failed: {job_info.get('error_detail')}")
52
+ return {"status": "error", "error": job_info.get("error_detail")}
53
+ if job_info.get("status") == "complete":
54
+ sk = requests.get(f"{API}/skills/{COMPANY}")
55
+ if sk.status_code == 200:
56
+ data = sk.json()
57
+ skills = data.get("skills", [])
58
+ print(
59
+ f" Compilation produced {len(skills)} skills (version: {data.get('version', 'N/A')})"
60
+ )
61
+ return data
62
+ except Exception:
63
+ pass
64
+ print(f" Waiting... ({(attempt + 1) * 5}s)")
65
+
66
+ return {"status": "timeout"}
67
+
68
+
69
+ def test_malformed_markdown():
70
+ """Inject malformed markdown and verify the pipeline doesn't crash."""
71
+ print("\n2. Malformed source resilience test...")
72
+
73
+ malformed = """## Corrupted Table
74
+ | Header 1 | Header 2
75
+ | --- | ---
76
+ | broken row
77
+
78
+ ## Nested
79
+ ### Subsection with no body
80
+
81
+ ||||
82
+ |--|-|
83
+
84
+ Unclosed bracket [[[[
85
+ """
86
+
87
+ # Save malformed file
88
+ path = os.path.join(TEST_DIR, "malformed_test.md")
89
+ with open(path, "w", encoding="utf-8") as f:
90
+ f.write(malformed)
91
+ print(" Injected malformed markdown file")
92
+
93
+ # Recompile
94
+ result = compile_and_wait("Malformed")
95
+ success = result.get("status") != "error"
96
+
97
+ # Clean up
98
+ if os.path.exists(path):
99
+ os.remove(path)
100
+ print(f" Cleaned up test file")
101
+
102
+ if success:
103
+ print(" [PASS] Pipeline survived malformed input")
104
+ else:
105
+ print(
106
+ f" [FAIL] Pipeline crashed on malformed input: {result.get('error', '')}"
107
+ )
108
+
109
+
110
+ def test_contradictory_policy():
111
+ """Inject contradictory data and verify detection."""
112
+ print("\n3. Contradiction detection test...")
113
+
114
+ # Slack message that contradicts refund SOP
115
+ contradictory = json.dumps(
116
+ [
117
+ {
118
+ "user": "founder",
119
+ "channel": "revenue",
120
+ "text": "Ignore the 14-day refund policy. If a customer complains loudly enough, give them whatever they want. We'll sort it out later.",
121
+ }
122
+ ]
123
+ )
124
+ path = os.path.join(TEST_DIR, "slack_hot_take.json")
125
+ with open(path, "w", encoding="utf-8") as f:
126
+ f.write(contradictory)
127
+ print(" Injected contradictory Slack hot take")
128
+
129
+ # Compile
130
+ result = compile_and_wait("Contradiction")
131
+ success = result.get("status") != "error"
132
+
133
+ if os.path.exists(path):
134
+ os.remove(path)
135
+ print(" Cleaned up test file")
136
+
137
+ if success:
138
+ skills = result.get("skills", [])
139
+ print(f" Produced {len(skills)} skills despite contradiction")
140
+ print(" [PASS] Contradiction test passed")
141
+ else:
142
+ print(
143
+ f" [FAIL] Pipeline crashed on contradictory input: {result.get('error', '')}"
144
+ )
145
+
146
+
147
+ def test_diff_works():
148
+ """Compile, change a file, recompile, verify diff is non-empty."""
149
+ print("\n4. Semantic diff verification test...")
150
+
151
+ sop_path = os.path.join(TEST_DIR, "notion_refund_sop.md")
152
+ if not os.path.exists(sop_path):
153
+ print(" [SKIP] Refund SOP not found")
154
+ return
155
+
156
+ # Read original
157
+ with open(sop_path, "r", encoding="utf-8") as f:
158
+ original = f.read()
159
+
160
+ # Get current version
161
+ r = requests.get(f"{API}/brain/versions/{COMPANY}")
162
+ v1 = "unknown"
163
+ if r.status_code == 200:
164
+ versions = r.json().get("versions", [])
165
+ if versions:
166
+ v1 = versions[0]["version"]
167
+
168
+ # Modify and recompile
169
+ modified = original.replace("30 day", "60 day").replace("30-day", "60-day")
170
+ with open(sop_path, "w", encoding="utf-8") as f:
171
+ f.write(modified)
172
+
173
+ compile_and_wait("Diff V2")
174
+
175
+ # Get new version
176
+ r = requests.get(f"{API}/brain/versions/{COMPANY}")
177
+ v2 = "unknown"
178
+ if r.status_code == 200:
179
+ versions = r.json().get("versions", [])
180
+ if versions:
181
+ v2 = versions[0]["version"]
182
+
183
+ # Restore original
184
+ with open(sop_path, "w", encoding="utf-8") as f:
185
+ f.write(original)
186
+ print(" Restored original SOP")
187
+
188
+ # Call diff endpoint
189
+ if v1 != "unknown" and v2 != "unknown":
190
+ r = requests.get(f"{API}/diff/{v1}/{v2}", params={"company_id": COMPANY})
191
+ if r.status_code == 200:
192
+ diff = r.json()
193
+ summary = diff.get("summary", {})
194
+ total_changes = (
195
+ summary.get("added_count", 0)
196
+ + summary.get("deleted_count", 0)
197
+ + summary.get("modified_count", 0)
198
+ + summary.get("confidence_shift_count", 0)
199
+ )
200
+ print(f" Total changes detected: {total_changes}")
201
+ print(
202
+ f" V1: {summary.get('v1_skills')} skills, V2: {summary.get('v2_skills')} skills"
203
+ )
204
+
205
+ if total_changes > 0:
206
+ print(" [PASS] Semantic diff correctly detected changes")
207
+ for m in diff.get("modified", []):
208
+ print(f" - {m['id']}: {m['field']} changed")
209
+ for cs in diff.get("confidence_shifts", []):
210
+ print(
211
+ f" - {cs['id']}: {cs['old_confidence']} → {cs['new_confidence']}"
212
+ )
213
+ else:
214
+ print(" [WARN] No changes detected — manual verification needed")
215
+ else:
216
+ print(f" [FAIL] Diff endpoint returned {r.status_code}")
217
+ else:
218
+ print(" [SKIP] Could not determine versions for diff")
219
+
220
+
221
+ def test_multi_compile_stability():
222
+ """Run 3 compiles in a row to verify stability."""
223
+ print("\n5. Multi-compile stability test...")
224
+ for i in range(3):
225
+ print(f"\n Run {i + 1}/3...")
226
+ result = compile_and_wait(f"Stability Run {i + 1}")
227
+ if result.get("status") == "error":
228
+ print(f" [FAIL] Compilation {i + 1} failed: {result.get('error', '')}")
229
+ return False
230
+ skills = result.get("skills", [])
231
+ print(f" Run {i + 1}: {len(skills)} skills produced")
232
+
233
+ print(" [PASS] 3 consecutive compilations succeeded")
234
+ return True
235
+
236
+
237
+ def main():
238
+ print("=" * 60)
239
+ print("KERNL STRESS TEST — Proving compiler resilience")
240
+ print("=" * 60)
241
+
242
+ try:
243
+ check_health()
244
+ except Exception as e:
245
+ print(f" [FATAL] API not reachable: {e}")
246
+ sys.exit(1)
247
+
248
+ # Test 1: Malformed input resilience
249
+ try:
250
+ test_malformed_markdown()
251
+ except Exception as e:
252
+ print(f" [ERROR] Malformed markdown test failed: {e}")
253
+
254
+ # Test 2: Contradictory input
255
+ try:
256
+ test_contradictory_policy()
257
+ except Exception as e:
258
+ print(f" [ERROR] Contradiction test failed: {e}")
259
+
260
+ # Test 3: Semantic diff
261
+ try:
262
+ test_diff_works()
263
+ except Exception as e:
264
+ print(f" [ERROR] Diff test failed: {e}")
265
+
266
+ # Test 4: Multi-compile stability
267
+ try:
268
+ test_multi_compile_stability()
269
+ except Exception as e:
270
+ print(f" [ERROR] Stability test failed: {e}")
271
+
272
+ print("\n" + "=" * 60)
273
+ print("STRESS TEST COMPLETE")
274
+ print("=" * 60)
275
+
276
+
277
+ if __name__ == "__main__":
278
+ main()