Spaces:

lablab-ai-amd-developer-hackathon
/

kernl-backend

Sleeping

ALPHA0008 commited on 13 days ago

Commit

a688aff

1 Parent(s): f1c4fd6

refactor: replace sequential 3-node pipeline with parallel 13-node multi-agent architecture

- Replaces old load_and_chunk → cluster_evidence → quality_normalize
pipeline with 13-node LangGraph graph using Send API fan-out
- Adds 3 parallel ingestion nodes (ingest_notion, ingest_slack, ingest_tickets)
with ingest_join barrier
- Adds 4 parallel extraction nodes (decisions, workflows, exceptions,
contradictions) with conditional routing
- Adds synthesize_skills, link_evidence, score_confidence, write_brain
sequential compilation pipeline
- Implements brain_agent with pre-computed embedding similarity search
and LLM threshold reasoning
- Removes old deprecated nodes from git tracking
- Updates CLAUDE.md to reflect current 13-node architecture, accurate
API endpoints, schema, and SSE pattern
- Updates .gitignore for session files, nul artifacts, and log files

Files changed (30) hide show

.gitignore +11 -0
CLAUDE.md +157 -163
backend/agent/brain_agent.py +100 -49
backend/db/supabase.py +47 -9
backend/graph/graph.py +107 -13
backend/graph/nodes/cluster_evidence.py +0 -64
backend/graph/nodes/detect_contradictions.py +51 -0
backend/graph/nodes/extract_decisions.py +43 -0
backend/graph/nodes/extract_exceptions.py +49 -0
backend/graph/nodes/extract_workflows.py +46 -0
backend/graph/nodes/ingest_join.py +29 -0
backend/graph/nodes/ingest_notion.py +60 -0
backend/graph/nodes/ingest_slack.py +50 -0
backend/graph/nodes/ingest_tickets.py +59 -0
backend/graph/nodes/link_evidence.py +78 -0
backend/graph/nodes/load_and_chunk.py +0 -174
backend/graph/nodes/load_sources.py +70 -0
backend/graph/nodes/quality_normalize.py +0 -83
backend/graph/nodes/score_confidence.py +61 -0
backend/graph/nodes/synthesize_skills.py +93 -97
backend/graph/nodes/write_brain.py +97 -58
backend/graph/state.py +19 -5
backend/llm.py +108 -13
backend/main.py +181 -67
backend/models/schemas.py +39 -0
backend/test_compile.py +82 -43
data/sources/rivanly-inc/notion_refund_sop.md +1 -1
frontend/src/app/compile/[jobId]/page.tsx +20 -8
scripts/smoke_test.py +103 -19
scripts/stress_test.py +278 -0

.gitignore CHANGED Viewed

@@ -45,3 +45,14 @@ data/sources/*/
 brand_alchemy_company_brain.html
 company_brain_PRD_v4.md

 brand_alchemy_company_brain.html
 company_brain_PRD_v4.md
+# Claude Code session files (never commit these)
+session-ses*.md
+# Windows artifacts
+nul
+backend/nul
+# Logs
+backend_log.txt
+*.log

CLAUDE.md CHANGED Viewed

@@ -14,35 +14,49 @@ Company Brain is a multi-agent compilation pipeline that extracts operational de
 ## Monorepo Structure
 ```
-company-brain/
 ├── backend/              ← FastAPI + LangGraph pipeline (Python)
 │   ├── main.py           ← FastAPI app entry point
 │   ├── graph/
 │   │   ├── state.py      ← BrainState TypedDict
-│   │   ├── nodes/        ← one file per LangGraph node
-│   │   │   ├── ingest_slack.py
-│   │   │   ├── ingest_notion.py
-│   │   │   ├── ingest_tickets.py
-│   │   │   ├── ingest_join.py
-│   │   │   ├── extract_decisions.py
-│   │   │   ├── extract_workflows.py
-│   │   │   ├── extract_exceptions.py
-│   │   │   ├── detect_contradictions.py
-│   │   │   ├── synthesize_skills.py
-│   │   │   ├── link_evidence.py
-│   │   │   ├── score_confidence.py
-│   │   │   └── write_brain.py
-│   │   └── graph.py      ← graph assembly + compile
-│   ├── agents/
-│   │   └── brain_agent.py ← query-time brain agent
 │   ├── db/
-│   │   └── supabase.py   ← Supabase client + queries
 │   ├── models/
 │   │   └── schemas.py    ← Pydantic models for API
-│   └── requirements.txt
-├── frontend/             ← Next.js 14 + Tailwind (Harshit)
 ├── data/
-│   └── sources/          ← 8 synthetic source files
 │       ├── notion_refund_sop.md
 │       ├── notion_pricing_policy.md
 │       ├── notion_eng_runbook.md
@@ -51,8 +65,13 @@ company-brain/
 │       ├── slack_export_support.json
 │       ├── slack_export_ops.json
 │       └── zendesk_tickets.json
-└── CLAUDE.md             ← this file
 ```
 ---
@@ -102,33 +121,33 @@ result = response.choices[0].message.content
 ## BrainState — The Central Data Structure
 ```python
-from typing import TypedDict, Annotated
 import operator
 class BrainState(TypedDict):
     company_id: str
-    source_files: list[dict]          # [{filename, content, sha256, type}]
-    # Ingestion outputs (parallel, accumulated with operator.add)
-    normalized_events: Annotated[list[dict], operator.add]    # from Slack
-    structured_sops: Annotated[list[dict], operator.add]      # from Notion
-    resolved_cases: Annotated[list[dict], operator.add]       # from tickets
-    # Extraction outputs (parallel, accumulated with operator.add)
-    raw_decisions: Annotated[list[dict], operator.add]
-    workflow_steps: Annotated[list[dict], operator.add]
-    exception_rules: Annotated[list[dict], operator.add]
-    contradictions: Annotated[list[dict], operator.add]
-    # Compilation outputs (sequential)
-    draft_skills: list[dict]
-    skills_with_evidence: list[dict]
-    final_skills: list[dict]
-    # Metadata
     job_id: str
     brain_version: str
-    errors: Annotated[list[str], operator.add]
 ```
 **The `Annotated[list, operator.add]` pattern is critical.** It allows multiple parallel nodes to write to the same list field without overwriting each other. Do not change this.
@@ -195,41 +214,20 @@ USER = """Extract all {type} from this company data:
 ---
-## Skills File Schema (per skill)
 ```python
 {
-    "id": "handle_refund_request",          # snake_case
-    "name": "Handle Refund Request",         # human readable
-    "domain": "support",                     # support|revenue|product_eng|customer_success|hr|finance_ops
-    "version": "1.0",
-    "confidence": 0.91,                      # 0.0 - 1.0
-    "stale": False,
-    "review_required": False,                # True if confidence < 0.6
-    "last_updated": "2026-05-04T09:30:00Z",
-    "trigger": {
-        "phrases": ["refund", "money back"],
-        "conditions": ["customer mentions payment dissatisfaction"]
-    },
-    "decision_logic": [
-        {
-            "condition": "plan == 'annual' AND days_since_purchase <= 14",
-            "action": "approve_full_refund",
-            "note": "No-questions policy within 14 days.",
-            "evidence_sources": [
-                {
-                    "source": "notion_refund_sop.md",
-                    "excerpt": "Annual plan customers within 14 days...",
-                    "confidence": 0.95
-                }
-            ]
-        }
-    ],
-    "forbidden_actions": [
-        "Never process refunds for lifetime deal accounts"
     ],
-    "escalation_chain": ["support_agent", "support_lead", "account_manager", "founder"],
-    "sla": "respond_within_2h, resolve_within_24h"
 }
 ```
@@ -238,11 +236,11 @@ USER = """Extract all {type} from this company data:
 ## Confidence Scoring Formula
 ```python
-def score_confidence(skill: dict, all_sources: list[dict]) -> float:
     base = 0.5
     # More sources = higher confidence
-    source_count = len(skill["decision_logic"][0].get("evidence_sources", []))
     if source_count >= 3:
         base += 0.25
     elif source_count == 2:
@@ -250,90 +248,65 @@ def score_confidence(skill: dict, all_sources: list[dict]) -> float:
     elif source_count == 1:
         base += 0.05
-    # Recent sources = higher confidence
-    # (check source file last_modified if available)
-    base += 0.15  # assume recent for v0
     # No contradictions for this skill = higher confidence
-    # (passed in from contradiction detector)
-    has_contradiction = False  # check contradictions list
     if not has_contradiction:
         base += 0.10
-    return min(base, 1.0)
 ```
 ---
 ## Brain Agent Pattern
-```python
-from sentence_transformers import SentenceTransformer
-import numpy as np
-# Load once at startup
-embedder = SentenceTransformer('all-MiniLM-L6-v2')
-# Pre-compute skill embeddings (call after compile)
-skill_embeddings = {}  # {skill_id: np.array}
-def compute_skill_embeddings(skills: list[dict]):
-    global skill_embeddings
-    for skill in skills:
-        text = f"{skill['name']} {' '.join(skill['trigger']['phrases'])}"
-        skill_embeddings[skill['id']] = embedder.encode(text)
-def match_skill(query: str) -> tuple[str, float]:
-    query_emb = embedder.encode(query)
-    scores = {}
-    for skill_id, emb in skill_embeddings.items():
-        score = float(np.dot(query_emb, emb) /
-                     (np.linalg.norm(query_emb) * np.linalg.norm(emb)))
-        scores[skill_id] = score
-    best_id = max(scores, key=scores.get)
-    return best_id, scores[best_id]
-def skill_to_markdown(skill: dict) -> str:
-    """Convert skill JSON to markdown for prompt injection."""
-    lines = [f"## {skill['name']}", ""]
-    for logic in skill['decision_logic']:
-        lines.append(f"- IF {logic['condition']}: {logic['action']}")
-        if logic.get('note'):
-            lines.append(f"  Note: {logic['note']}")
-    lines.append("")
-    lines.append("FORBIDDEN: " + "; ".join(skill['forbidden_actions']))
-    lines.append("ESCALATE: " + " → ".join(skill['escalation_chain']))
-    return "\n".join(lines)
-```
 ---
-## FastAPI SSE Pattern
 ```python
-from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
-import asyncio
-import json
-async def event_generator(job_id: str):
-    """Yields SSE events during compilation."""
-    async for event in compilation_events[job_id]:
-        yield f"event: {event['type']}\ndata: {json.dumps(event['data'])}\n\n"
-@app.get("/compile/stream")
-async def stream_compile(job_id: str):
-    return StreamingResponse(
-        event_generator(job_id),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Access-Control-Allow-Origin": "*"  # CORS for frontend
-        }
-    )
 ```
 ---
 ## Supabase Tables
@@ -359,18 +332,20 @@ CREATE TABLE skills_files (
   is_current BOOLEAN DEFAULT false
 );
-CREATE UNIQUE INDEX idx_one_current_per_company
-  ON skills_files(company_id) WHERE is_current = true;
-CREATE TABLE compile_runs (
-  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
   company_id TEXT REFERENCES companies(id),
-  status TEXT CHECK (status IN ('started','running','complete','error')),
-  started_at TIMESTAMPTZ DEFAULT now(),
-  completed_at TIMESTAMPTZ,
-  duration_ms INTEGER,
-  result_version TEXT,
-  error_detail TEXT
 );
 CREATE TABLE source_files (
@@ -378,10 +353,23 @@ CREATE TABLE source_files (
   company_id TEXT REFERENCES companies(id),
   filename TEXT NOT NULL,
   sha256 TEXT NOT NULL,
-  content TEXT NOT NULL,
-  source_type TEXT CHECK (source_type IN ('slack_json','notion_md','tickets_json')),
   uploaded_at TIMESTAMPTZ DEFAULT now()
 );
 ```
 ---
@@ -401,14 +389,20 @@ COMPANY_ID=rivanly-inc
 ## API Endpoints — Full List
 ```
-POST /compile              → trigger pipeline, returns {job_id, stream_url}
-GET  /compile/stream       → SSE stream for job_id
-GET  /brain/status         → current brain version + stats
-GET  /skills               → all skills (lightweight)
-GET  /skills/{id}          → full skill detail
-POST /agent/handle         → brain agent query
-GET  /diff/{v1}/{v2}       → version diff
-POST /sources/upload       → upload source files
 ```
 ---
@@ -420,7 +414,7 @@ POST /sources/upload       → upload source files
 3. **Never read raw source files at query time** — brain agent reads skills file only
 4. **All LLM calls wrapped in try/except** — retry once on JSON parse failure, return `[]` if still failing
 5. **`skills_files.is_current` enforced by partial unique index** — only one current per company
-6. **`compile_runs` table is append-only** — never update rows, only insert
 7. **CORS headers on all endpoints** — frontend is on different domain
 8. **Temperature 0.1 on all extraction calls** — deterministic is better than creative here

 ## Monorepo Structure
 ```
+kernl/
 ├── backend/              ← FastAPI + LangGraph pipeline (Python)
 │   ├── main.py           ← FastAPI app entry point
+│   ├── llm.py            ← vLLM client, semaphore(4), embeddings, JSON self-repair
+│   ├── sse.py            ← Server-Sent Events bus for streaming
+│   ├── test_compile.py   ← Standalone graph test
 │   ├── graph/
 │   │   ├── state.py      ← BrainState TypedDict
+│   │   ├── graph.py      ← graph assembly + compile
+│   │   └── nodes/        ← one file per LangGraph node
+│   │       ├── load_sources.py
+│   │       ├── ingest_slack.py
+│   │       ├── ingest_notion.py
+│   │       ├── ingest_tickets.py
+│   │       ├── ingest_join.py
+│   │       ├── extract_decisions.py
+│   │       ├── extract_workflows.py
+│   │       ├── extract_exceptions.py
+│   │       ├── detect_contradictions.py
+│   │       ├── synthesize_skills.py
+│   │       ├── link_evidence.py
+│   │       ├── score_confidence.py
+│   │       └── write_brain.py
+│   ├── agent/
+│   │   └── brain_agent.py ← query-time brain agent (embedding + LLM reasoning)
 │   ├── db/
+│   │   ├── supabase.py   ← Supabase client + queries
+│   │   └── schema.sql    ← DB schema (5 tables)
 │   ├── models/
 │   │   └── schemas.py    ← Pydantic models for API
+│   ├── requirements.txt
+│   └── .env.example
+├── frontend/             ← Next.js 16.2.5 + Tailwind v4
+│   ├── src/app/
+│   │   ├── page.tsx          ← Dashboard
+│   │   ├── layout.tsx        ← Root layout
+│   │   ├── globals.css       ← Tailwind + custom theme
+│   │   ├── compile/[jobId]/page.tsx   ← Pipeline stream viewer
+│   │   ├── skills/[companyId]/page.tsx ← Skills viewer
+│   │   └── demo/[companyId]/page.tsx  ← Brain vs Generic A/B comparison
+│   └── ...
 ├── data/
+│   └── sources/rivanly-inc/  ← 8 synthetic source files
 │       ├── notion_refund_sop.md
 │       ├── notion_pricing_policy.md
 │       ├── notion_eng_runbook.md
 │       ├── slack_export_support.json
 │       ├── slack_export_ops.json
 │       └── zendesk_tickets.json
+├── scripts/
+│   ├── smoke_test.py     ← Dynamic policy change propagation test
+│   └── stress_test.py    ← Resilience test (malformed input, contradictions)
+├── CLAUDE.md             ← this file
+└── .gitignore
 ```
+**Note:** `backend/agents/` is empty — `brain_agent.py` lives in `backend/agent/` instead.
 ---
 ## BrainState — The Central Data Structure
 ```python
+from typing import TypedDict, Annotated, List, Dict, Any
 import operator
 class BrainState(TypedDict):
     company_id: str
     job_id: str
+    source_files: Annotated[List[Dict[str, Any]], operator.add]
+    structured_sops: Annotated[List[Dict[str, Any]], operator.add]
+    normalized_events: Annotated[List[Dict[str, Any]], operator.add]
+    resolved_cases: Annotated[List[Dict[str, Any]], operator.add]
+    all_chunks: List[Dict[str, Any]]
+    raw_decisions: Annotated[List[Dict[str, Any]], operator.add]
+    workflow_steps: Annotated[List[Dict[str, Any]], operator.add]
+    exception_rules: Annotated[List[Dict[str, Any]], operator.add]
+    contradictions: Annotated[List[Dict[str, Any]], operator.add]
+    draft_skills: List[Dict[str, Any]]
+    skills_with_evidence: List[Dict[str, Any]]
+    final_skills: List[Dict[str, Any]]
+    skills_file: Dict[str, Any]
     brain_version: str
+    start_time: float
+    errors: Annotated[List[str], operator.add]
 ```
 **The `Annotated[list, operator.add]` pattern is critical.** It allows multiple parallel nodes to write to the same list field without overwriting each other. Do not change this.
 ---
+## Skills File Schema (per skill — pipeline output)
 ```python
 {
+    "id": "handle_refund_request",        # snake_case
+    "category": "Customer Support",       # operational domain
+    "rule": "Approve full refund for annual plans within 14 days",  # actionable rule text
+    "rationale": "No-questions policy within 14 days for annual plans",
+    "evidence": [
+        "notion_refund_sop.md: Annual plan customers within 14 days..."
     ],
+    "source_files": ["notion_refund_sop.md"],
+    "confidence": 0.85,                    # 0.0 - 1.0 (scored by score_confidence node)
+    "embedding_vector": [...]              # pre-computed for semantic matching
 }
 ```
 ## Confidence Scoring Formula
 ```python
+def score_confidence(skill: dict, contradictions: list) -> float:
     base = 0.5
     # More sources = higher confidence
+    source_count = len(skill.get("evidence", []))
     if source_count >= 3:
         base += 0.25
     elif source_count == 2:
     elif source_count == 1:
         base += 0.05
+    # Recency bonus (assume recent for v0)
+    base += 0.15
     # No contradictions for this skill = higher confidence
+    skill_id = skill.get("id", "")
+    has_contradiction = any(
+        c.get("id", "").startswith(skill_id.split("_")[0])
+        or skill_id in str(c.get("domain", ""))
+        for c in contradictions
+    )
     if not has_contradiction:
         base += 0.10
+    return round(min(base, 1.0), 2)
 ```
 ---
 ## Brain Agent Pattern
+The brain agent at `backend/agent/brain_agent.py` uses:
+1. **Embedding similarity** — encodes the query with `all-MiniLM-L6-v2` and scores all skills via cosine similarity
+2. **Top-K retrieval** — fetches 5 best-matching skills
+3. **LLM reasoning** — injects retrieved skills into the prompt with the scenario and does arithmetic threshold analysis
+4. **JSON parsing** — extracts the response with a fallback for malformed JSON
+Key behavior:
+- Uses **pre-computed embeddings** (stored in DB by write_brain node) or computes on-the-fly
+- The LLM prompt has explicit step-by-step threshold comparison logic
+- Gibberish rejection: low embedding similarity → low confidence → meaningful fallback
+- A/B comparison: `with_brain=True/False` to compare against a generic baseline
 ---
+## SSE Event Bus Pattern
+`backend/sse.py` uses an `asyncio.Queue` per job_id with a `CompilationEventBus` singleton. Events are unnamed (no `event:` field) — the frontend uses `EventSource.onmessage` which fires on unnamed events. Payload is wrapped: `data: {"event": "<type>", "data": {<payload>}}\n\n`.
 ```python
+class CompilationEventBus:
+    def __init__(self):
+        self.queues: Dict[str, asyncio.Queue] = {}
+    async def emit_event(self, job_id: str, event_type: str, data: dict):
+        queue = self.get_queue(job_id)
+        await queue.put({"type": event_type, "data": data})
+    async def event_generator(self, job_id: str) -> AsyncGenerator[str, None]:
+        queue = self.get_queue(job_id)
+        while True:
+            event = await asyncio.wait_for(queue.get(), timeout=300)
+            payload = json.dumps({"event": event["type"], "data": event["data"]})
+            yield f"data: {payload}\n\n"
+            if event["type"] in ["pipeline_complete", "pipeline_error"]:
+                break
 ```
+Queue auto-cleaned in `finally` block after completion or error.
 ---
 ## Supabase Tables
   is_current BOOLEAN DEFAULT false
 );
+CREATE UNIQUE INDEX idx_skills_files_current ON skills_files(company_id) WHERE is_current = true;
+CREATE TABLE skills (
+  id TEXT NOT NULL,
   company_id TEXT REFERENCES companies(id),
+  skills_file_id UUID REFERENCES skills_files(id),
+  name TEXT NOT NULL,
+  domain TEXT NOT NULL,
+  version TEXT NOT NULL,
+  confidence FLOAT NOT NULL,
+  stale BOOLEAN DEFAULT false,
+  review_required BOOLEAN DEFAULT false,
+  skill_json JSONB NOT NULL,
+  PRIMARY KEY (id, company_id, skills_file_id)
 );
 CREATE TABLE source_files (
   company_id TEXT REFERENCES companies(id),
   filename TEXT NOT NULL,
   sha256 TEXT NOT NULL,
+  storage_path TEXT NOT NULL,
   uploaded_at TIMESTAMPTZ DEFAULT now()
 );
+CREATE TABLE compile_runs (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  company_id TEXT REFERENCES companies(id),
+  status TEXT NOT NULL CHECK (status IN ('started','running','complete','error')),
+  started_at TIMESTAMPTZ DEFAULT now(),
+  completed_at TIMESTAMPTZ,
+  duration_ms INTEGER,
+  result_version TEXT,
+  error_detail TEXT
+);
+CREATE INDEX idx_skills_files_company ON skills_files(company_id, compiled_at DESC);
+CREATE INDEX idx_skills_company ON skills(company_id);
 ```
 ---
 ## API Endpoints — Full List
 ```
+POST   /compile                    → trigger pipeline, returns {job_id, status}
+POST   /compile/run                → alias for /compile
+GET    /compile/{job_id}/stream    → SSE stream for live compilation progress
+GET    /compile/{job_id}/status    → poll job status (started/running/complete/error)
+GET    /health                     → API health + vLLM + DB status
+POST   /sources/upload             → upload a source file
+GET    /sources/{company_id}       → list all source files
+DELETE /sources/{company_id}/{filename} → delete a source file
+POST   /agent/handle               → brain agent query (legacy schema)
+POST   /agent/query                → brain agent query (canonical schema)
+GET    /skills                     → get current brain JSON (legacy)
+GET    /skills/{company_id}        → get current brain with version + metadata
+GET    /brain/versions/{company_id}→ list all compiled versions
+GET    /diff/{v1}/{v2}             → semantic diff between two brain versions
 ```
 ---
 3. **Never read raw source files at query time** — brain agent reads skills file only
 4. **All LLM calls wrapped in try/except** — retry once on JSON parse failure, return `[]` if still failing
 5. **`skills_files.is_current` enforced by partial unique index** — only one current per company
+6. **`compile_runs` table is append-only** — never update rows, only insert status
 7. **CORS headers on all endpoints** — frontend is on different domain
 8. **Temperature 0.1 on all extraction calls** — deterministic is better than creative here

backend/agent/brain_agent.py CHANGED Viewed

@@ -1,25 +1,27 @@
 import json
 from backend.db.supabase import get_client
-from backend.llm import llm_call, get_embedding, cosine_similarity
-async def handle_agent_query(company_id: str, scenario: str, context: dict = None, with_brain: bool = True) -> dict:
-    """
-    Real agent query handler.  No keyword routing, no hardcoded actions.
-    Everything flows through: retrieve skills -> build prompt -> call vLLM -> return raw result.
-    """
     if not with_brain:
         return await _baseline_query(scenario, context)
-    # --- WITH BRAIN ---
     db = get_client()
     if not db:
         return _error_response("Database connection failed.")
-    # 1. Fetch latest compiled skills
-    res = db.table("skills_files").select("brain_json").eq(
-        "company_id", company_id
-    ).order("compiled_at", desc=True).limit(1).execute()
     if not res.data:
         return _error_response("No compiled brain found. Please compile first.")
@@ -28,58 +30,108 @@ async def handle_agent_query(company_id: str, scenario: str, context: dict = Non
     if not skills:
         return _error_response("Brain is empty — no skills compiled.")
-    # 2. Embed the query and score every skill
     query_text = f"{scenario} {json.dumps(context or {})}"
     query_emb = get_embedding(query_text)
-    scored = []
-    for i, skill in enumerate(skills):
-        skill_text = f"{skill.get('category', '')} {skill.get('rule', '')} {skill.get('rationale', '')}"
-        skill_emb = get_embedding(skill_text)
-        score = cosine_similarity(query_emb, skill_emb)
-        scored.append({"skill": skill, "score": round(score, 4), "index": i})
     scored.sort(key=lambda x: x["score"], reverse=True)
     top_results = scored[:5]
     retrieval_scores = [s["score"] for s in top_results]
-    # 3. Build skills context for the LLM
     skills_context = ""
     for rank, s in enumerate(top_results):
         sk = s["skill"]
-        skills_context += f"\n--- Skill #{rank+1} (retrieval_score: {s['score']}) ---\n"
         skills_context += f"Category: {sk.get('category', 'Unknown')}\n"
         skills_context += f"Rule: {sk.get('rule', '')}\n"
         skills_context += f"Rationale: {sk.get('rationale', '')}\n"
-        skills_context += f"Evidence: {json.dumps(sk.get('evidence', []))}\n"
         skills_context += f"Compiled Confidence: {sk.get('confidence', 'unknown')}\n"
-    # 4. Prompt the LLM - no example confidence values to bias it
-    prompt = """You are the Kernl Brain Agent. You have access to this company's compiled operational skills (retrieved below, ranked by relevance).
-Your task:
-1. Read the scenario and optional JSON context carefully.
-2. Examine the retrieved skills and their retrieval_scores.
-3. Determine whether any skill clearly applies to this scenario.
-4. If a skill applies, state the specific recommended action from that skill's rule.
-5. If NO skill applies, or if the input is nonsensical/gibberish, say so honestly.
-CONFIDENCE SCORING - base it on real signals:
-- retrieval_score < 0.3 -> scenario is likely unrelated to any skill -> confidence < 0.2
-- retrieval_score 0.3-0.5 -> weak match -> confidence 0.2-0.5
-- retrieval_score 0.5-0.7 -> moderate match -> confidence 0.5-0.75
-- retrieval_score > 0.7 AND rule clearly addresses the scenario -> confidence 0.75-0.95
-- Never exceed 0.95 unless the match is exact and unambiguous.
-- Gibberish or nonsensical input -> confidence 0.0, recommended_action = "unable to determine"
-Respond with ONLY a JSON object (no markdown fences, no text outside the JSON):
 {
-  "recommended_action": "the specific action to take",
-  "rule_applied": "exact rule text from the best matching skill",
-  "evidence": ["evidence items from the skill"],
-  "skill_matched": "the category of the matched skill",
   "confidence": 0.0,
-  "reasoning": "explain why this skill applies and how you chose the confidence level"
 }"""
     user_content = f"--- Scenario ---\n{scenario}\n\n--- Additional Context ---\n{json.dumps(context or {})}\n\n--- Retrieved Skills (ranked by relevance) ---\n{skills_context}"
@@ -87,11 +139,11 @@ Respond with ONLY a JSON object (no markdown fences, no text outside the JSON):
     response_str = await llm_call(prompt, user_content)
     result = _parse_json(response_str)
     result["retrieval_scores"] = retrieval_scores
     return result
 async def _baseline_query(scenario: str, context: dict = None) -> dict:
-    """Without-brain baseline: LLM answers with zero company context."""
     prompt = """You are a generic AI assistant. You have NO company-specific knowledge or policies.
 Answer based only on general industry standards. Be honest about your lack of specific context.
 Respond with ONLY a JSON object:
@@ -110,7 +162,6 @@ Respond with ONLY a JSON object:
 def _parse_json(raw: str) -> dict:
-    """Parse LLM response as JSON, stripping markdown fences."""
     try:
         clean = raw.strip()
         if clean.startswith("```json"):
@@ -128,7 +179,7 @@ def _parse_json(raw: str) -> dict:
             "skill_matched": "none",
             "confidence": 0.0,
             "retrieval_scores": [],
-            "reasoning": f"JSON parse error: {e}. Raw: {raw[:500]}"
         }
@@ -140,5 +191,5 @@ def _error_response(msg: str) -> dict:
         "skill_matched": "none",
         "confidence": 0.0,
         "retrieval_scores": [],
-        "reasoning": msg
     }

 import json
+import numpy as np
 from backend.db.supabase import get_client
+from backend.llm import llm_call, get_embedding
+async def handle_agent_query(
+    company_id: str, scenario: str, context: dict = None, with_brain: bool = True
+) -> dict:
     if not with_brain:
         return await _baseline_query(scenario, context)
     db = get_client()
     if not db:
         return _error_response("Database connection failed.")
+    res = (
+        db.table("skills_files")
+        .select("brain_json")
+        .eq("company_id", company_id)
+        .order("compiled_at", desc=True)
+        .limit(1)
+        .execute()
+    )
     if not res.data:
         return _error_response("No compiled brain found. Please compile first.")
     if not skills:
         return _error_response("Brain is empty — no skills compiled.")
     query_text = f"{scenario} {json.dumps(context or {})}"
     query_emb = get_embedding(query_text)
+    cached = True
+    for s in skills:
+        if "embedding_vector" not in s:
+            cached = False
+            break
+    if cached:
+        skill_embs = np.array([s["embedding_vector"] for s in skills])
+        query_vec = np.array(query_emb)
+        norms = np.linalg.norm(skill_embs, axis=1) * np.linalg.norm(query_vec)
+        norms[norms == 0] = 1e-10
+        scores = np.dot(skill_embs, query_vec) / norms
+        top_indices = np.argsort(scores)[-5:][::-1]
+        scored = []
+        for idx in top_indices:
+            scored.append(
+                {
+                    "skill": skills[idx],
+                    "score": round(float(scores[idx]), 4),
+                    "index": int(idx),
+                }
+            )
+    else:
+        scored = []
+        for i, skill in enumerate(skills):
+            skill_text = f"{skill.get('category', '')} {skill.get('rule', '')} {skill.get('rationale', '')}"
+            skill_emb = get_embedding(skill_text)
+            score = float(
+                np.dot(query_emb, skill_emb)
+                / (np.linalg.norm(query_emb) * np.linalg.norm(skill_emb) + 1e-10)
+            )
+            scored.append({"skill": skill, "score": round(score, 4), "index": i})
     scored.sort(key=lambda x: x["score"], reverse=True)
     top_results = scored[:5]
     retrieval_scores = [s["score"] for s in top_results]
     skills_context = ""
     for rank, s in enumerate(top_results):
         sk = s["skill"]
+        skills_context += (
+            f"\n--- Skill #{rank + 1} (retrieval_score: {s['score']}) ---\n"
+        )
         skills_context += f"Category: {sk.get('category', 'Unknown')}\n"
         skills_context += f"Rule: {sk.get('rule', '')}\n"
         skills_context += f"Rationale: {sk.get('rationale', '')}\n"
+        evidence = sk.get("evidence", [])
+        if isinstance(evidence, list):
+            skills_context += f"Evidence: {json.dumps(evidence[:3])}\n"
         skills_context += f"Compiled Confidence: {sk.get('confidence', 'unknown')}\n"
+    prompt = """You are a logical policy reasoning engine. Your ONLY job is to compare scenario parameters against rule thresholds using pure arithmetic, then output the correct action.
+CRITICAL LANGUAGE INTERPRETATION RULES:
+- "No refunds after X days" means: refunds ARE allowed if the scenario is BEFORE X days. The word "after" creates a threshold at X. Below X = allowed. Above X = denied.
+- "Full refund within X days" means: refunds are allowed ONLY if scenario is WITHIN X days. Below X = allowed. Above X = denied.
+- "No refunds for X" (without a threshold) is an absolute ban.
+ALWAYS compute: does the scenario value fall on the ALLOWED side or the DENIED side of the threshold?
+Follow these exact steps:
+STEP 1: Extract numeric thresholds from the matched rule (e.g., "60 days" → 60).
+STEP 2: Extract the corresponding parameter from the scenario (e.g., days_since_purchase=45).
+STEP 3: COMPARE: Write the comparison explicitly (e.g., "45 < 60, so customer is BEFORE the threshold").
+STEP 4: DECIDE based solely on the comparison outcome.
+Example A:
+  Rule: "No refunds after 60 days. If purchase was more than 60 days ago, deny."
+  Scenario: days_since_purchase=45
+  STEP 1: threshold = 60 days
+  STEP 2: scenario = 45 days
+  STEP 3: 45 < 60, customer is BEFORE the threshold
+  STEP 4: Action = approve (customer qualifies under 60-day limit)
+Example B:
+  Rule: "Full refund only within 14 days of purchase"
+  Scenario: days_since_purchase=45
+  STEP 1: threshold = 14 days
+  STEP 2: scenario = 45 days
+  STEP 3: 45 > 14, customer is AFTER the threshold
+  STEP 4: Action = deny (outside the refund window)
+Your recommended_action MUST exactly match what the math says. Do not let the emotional tone of the rule ("absolutely no", "no exceptions") override the arithmetic threshold.
+confidence:
+- retrieval_score < 0.3 → 0.0-0.2 (unrelated)
+- 0.3-0.5 → 0.2-0.5 (weak)
+- 0.5-0.7 → 0.5-0.75 (moderate)
+- > 0.7 and correct match → 0.75-0.95 (strong)
+- gibberish → 0.0
+Respond with ONLY this JSON:
 {
+  "recommended_action": "action based on your math comparison",
+  "rule_applied": "exact rule text from best matching skill",
+  "evidence": ["evidence items"],
+  "skill_matched": "skill category",
   "confidence": 0.0,
+  "reasoning": "STEP 1: [threshold] STEP 2: [scenario value] STEP 3: [numeric comparison] STEP 4: [action]"
 }"""
     user_content = f"--- Scenario ---\n{scenario}\n\n--- Additional Context ---\n{json.dumps(context or {})}\n\n--- Retrieved Skills (ranked by relevance) ---\n{skills_context}"
     response_str = await llm_call(prompt, user_content)
     result = _parse_json(response_str)
     result["retrieval_scores"] = retrieval_scores
+    result["cached_embedding"] = cached
     return result
 async def _baseline_query(scenario: str, context: dict = None) -> dict:
     prompt = """You are a generic AI assistant. You have NO company-specific knowledge or policies.
 Answer based only on general industry standards. Be honest about your lack of specific context.
 Respond with ONLY a JSON object:
 def _parse_json(raw: str) -> dict:
     try:
         clean = raw.strip()
         if clean.startswith("```json"):
             "skill_matched": "none",
             "confidence": 0.0,
             "retrieval_scores": [],
+            "reasoning": f"JSON parse error: {e}. Raw: {raw[:500]}",
         }
         "skill_matched": "none",
         "confidence": 0.0,
         "retrieval_scores": [],
+        "reasoning": msg,
     }

backend/db/supabase.py CHANGED Viewed

@@ -14,50 +14,88 @@ else:
     # but actual DB calls will fail if not provided.
     supabase = None
 def get_client():
     return supabase
 def get_current_brain(company_id: str):
-    if not supabase: return None
-    res = supabase.table("skills_files").select("*").eq("company_id", company_id).eq("is_current", True).execute()
     if res.data:
         return res.data[0]
     return None
 def save_skills_file(data: dict):
-    if not supabase: return None
     res = supabase.table("skills_files").insert(data).execute()
     return res.data
 def save_compile_run(data: dict):
-    if not supabase: return None
     res = supabase.table("compile_runs").insert(data).execute()
     return res.data
 def update_compile_run(run_id: str, data: dict):
-    if not supabase: return None
     res = supabase.table("compile_runs").update(data).eq("id", run_id).execute()
     return res.data
 def get_source_hashes(company_id: str):
-    if not supabase: return {}
     # Get the latest current brain
     brain = get_current_brain(company_id)
     if brain:
         return brain.get("source_hashes", {})
     return {}
 def save_source_file(data: dict):
-    if not supabase: return None
     res = supabase.table("source_files").insert(data).execute()
     return res.data
 def get_skills_by_brain_id(brain_id: str):
-    if not supabase: return []
     res = supabase.table("skills").select("*").eq("skills_file_id", brain_id).execute()
     return res.data
 def insert_skills(data: list):
-    if not supabase: return None
     res = supabase.table("skills").insert(data).execute()
     return res.data

     # but actual DB calls will fail if not provided.
     supabase = None
 def get_client():
     return supabase
 def get_current_brain(company_id: str):
+    if not supabase:
+        return None
+    res = (
+        supabase.table("skills_files")
+        .select("*")
+        .eq("company_id", company_id)
+        .eq("is_current", True)
+        .execute()
+    )
     if res.data:
         return res.data[0]
     return None
 def save_skills_file(data: dict):
+    if not supabase:
+        return None
     res = supabase.table("skills_files").insert(data).execute()
     return res.data
 def save_compile_run(data: dict):
+    if not supabase:
+        return None
     res = supabase.table("compile_runs").insert(data).execute()
     return res.data
 def update_compile_run(run_id: str, data: dict):
+    if not supabase:
+        return None
     res = supabase.table("compile_runs").update(data).eq("id", run_id).execute()
     return res.data
 def get_source_hashes(company_id: str):
+    if not supabase:
+        return {}
     # Get the latest current brain
     brain = get_current_brain(company_id)
     if brain:
         return brain.get("source_hashes", {})
     return {}
 def save_source_file(data: dict):
+    if not supabase:
+        return None
     res = supabase.table("source_files").insert(data).execute()
     return res.data
 def get_skills_by_brain_id(brain_id: str):
+    if not supabase:
+        return []
     res = supabase.table("skills").select("*").eq("skills_file_id", brain_id).execute()
     return res.data
 def insert_skills(data: list):
+    if not supabase:
+        return None
     res = supabase.table("skills").insert(data).execute()
     return res.data
+def get_brain_by_version(company_id: str, version: str):
+    if not supabase:
+        return None
+    res = (
+        supabase.table("skills_files")
+        .select("*")
+        .eq("company_id", company_id)
+        .eq("version", version)
+        .execute()
+    )
+    if res.data:
+        return res.data[0]
+    return None

backend/graph/graph.py CHANGED Viewed

@@ -1,30 +1,124 @@
 from langgraph.graph import StateGraph, END
 from backend.graph.state import BrainState
-from backend.graph.nodes.load_and_chunk import load_and_chunk
-from backend.graph.nodes.cluster_evidence import cluster_evidence
 from backend.graph.nodes.synthesize_skills import synthesize_skills
-from backend.graph.nodes.quality_normalize import quality_normalize
 from backend.graph.nodes.write_brain import write_brain
 def build_compilation_graph() -> StateGraph:
     """
-    Linear 5-node pipeline:
-      load_and_chunk → cluster_evidence → synthesize_skills → quality_normalize → write_brain
     """
     workflow = StateGraph(BrainState)
-    workflow.add_node("load_and_chunk", load_and_chunk)
-    workflow.add_node("cluster_evidence", cluster_evidence)
     workflow.add_node("synthesize_skills", synthesize_skills)
-    workflow.add_node("quality_normalize", quality_normalize)
     workflow.add_node("write_brain", write_brain)
-    workflow.set_entry_point("load_and_chunk")
-    workflow.add_edge("load_and_chunk", "cluster_evidence")
-    workflow.add_edge("cluster_evidence", "synthesize_skills")
-    workflow.add_edge("synthesize_skills", "quality_normalize")
-    workflow.add_edge("quality_normalize", "write_brain")
     workflow.add_edge("write_brain", END)
     return workflow.compile()

 from langgraph.graph import StateGraph, END
+from langgraph.types import Send
 from backend.graph.state import BrainState
+from backend.graph.nodes.load_sources import load_sources
+from backend.graph.nodes.ingest_notion import ingest_notion
+from backend.graph.nodes.ingest_slack import ingest_slack
+from backend.graph.nodes.ingest_tickets import ingest_tickets
+from backend.graph.nodes.ingest_join import ingest_join
+from backend.graph.nodes.extract_decisions import extract_decisions
+from backend.graph.nodes.extract_workflows import extract_workflows
+from backend.graph.nodes.extract_exceptions import extract_exceptions
+from backend.graph.nodes.detect_contradictions import detect_contradictions
 from backend.graph.nodes.synthesize_skills import synthesize_skills
+from backend.graph.nodes.link_evidence import link_evidence
+from backend.graph.nodes.score_confidence import score_confidence
 from backend.graph.nodes.write_brain import write_brain
+def route_to_ingestion(state: BrainState) -> list[Send]:
+    """Fan-out: dispatch source files to type-specific ingestion nodes."""
+    sends = []
+    for f in state.get("source_files", []):
+        dt = f.get("doc_type", "unknown")
+        payload = {
+            "company_id": state["company_id"],
+            "job_id": state["job_id"],
+            "source_files": [f],
+        }
+        if dt == "notion_md":
+            sends.append(Send("ingest_notion", payload))
+        elif dt == "slack_json":
+            sends.append(Send("ingest_slack", payload))
+        elif dt == "tickets_json":
+            sends.append(Send("ingest_tickets", payload))
+    return sends
+def route_to_extraction(state: BrainState) -> list[Send]:
+    """Fan-out: dispatch all chunks to 4 parallel extraction agents."""
+    return [
+        Send("extract_decisions", dict(state)),
+        Send("extract_workflows", dict(state)),
+        Send("extract_exceptions", dict(state)),
+        Send("detect_contradictions", dict(state)),
+    ]
 def build_compilation_graph() -> StateGraph:
     """
+    Parallel multi-agent graph:
+    load_sources
+      → route_to_ingestion (Send fan-out)
+      → [ingest_notion, ingest_slack, ingest_tickets] (parallel)
+      → ingest_join (barrier)
+      → route_to_extraction (Send fan-out)
+      → [extract_decisions, extract_workflows, extract_exceptions, detect_contradictions] (parallel)
+      → synthesize_skills → link_evidence → score_confidence → write_brain
     """
     workflow = StateGraph(BrainState)
+    # --- Ingestion layer ---
+    workflow.add_node("load_sources", load_sources)
+    workflow.add_node("ingest_notion", ingest_notion)
+    workflow.add_node("ingest_slack", ingest_slack)
+    workflow.add_node("ingest_tickets", ingest_tickets)
+    workflow.add_node("ingest_join", ingest_join)
+    # --- Extraction layer ---
+    workflow.add_node("extract_decisions", extract_decisions)
+    workflow.add_node("extract_workflows", extract_workflows)
+    workflow.add_node("extract_exceptions", extract_exceptions)
+    workflow.add_node("detect_contradictions", detect_contradictions)
+    # --- Compilation layer ---
     workflow.add_node("synthesize_skills", synthesize_skills)
+    workflow.add_node("link_evidence", link_evidence)
+    workflow.add_node("score_confidence", score_confidence)
     workflow.add_node("write_brain", write_brain)
+    # --- Edges ---
+    workflow.set_entry_point("load_sources")
+    # load_sources fans out to 3 parallel ingest nodes
+    workflow.add_conditional_edges(
+        "load_sources",
+        route_to_ingestion,
+        [
+            "ingest_notion",
+            "ingest_slack",
+            "ingest_tickets",
+        ],
+    )
+    # All 3 ingest nodes converge at the barrier join
+    workflow.add_edge("ingest_notion", "ingest_join")
+    workflow.add_edge("ingest_slack", "ingest_join")
+    workflow.add_edge("ingest_tickets", "ingest_join")
+    # ingest_join fans out to 4 parallel extraction agents
+    workflow.add_conditional_edges(
+        "ingest_join",
+        route_to_extraction,
+        [
+            "extract_decisions",
+            "extract_workflows",
+            "extract_exceptions",
+            "detect_contradictions",
+        ],
+    )
+    # All 4 extraction agents converge at synthesize_skills
+    workflow.add_edge("extract_decisions", "synthesize_skills")
+    workflow.add_edge("extract_workflows", "synthesize_skills")
+    workflow.add_edge("extract_exceptions", "synthesize_skills")
+    workflow.add_edge("detect_contradictions", "synthesize_skills")
+    # Sequential compilation pipeline
+    workflow.add_edge("synthesize_skills", "link_evidence")
+    workflow.add_edge("link_evidence", "score_confidence")
+    workflow.add_edge("score_confidence", "write_brain")
     workflow.add_edge("write_brain", END)
     return workflow.compile()

backend/graph/nodes/cluster_evidence.py DELETED Viewed

@@ -1,64 +0,0 @@
-"""
-Node 2: Embed all chunks and cluster them by domain using the LLM.
-Emits SSE stage: EMBEDDING
-"""
-import json
-from backend.graph.state import BrainState
-from backend.llm import llm_call, get_embeddings
-from backend.sse import emit
-async def cluster_evidence(state: BrainState) -> dict:
-    job_id = state["job_id"]
-    chunks = state.get("chunks", [])
-    print(f"[{job_id}] Node cluster_evidence started with {len(chunks)} chunks")
-    if not chunks:
-        await emit(job_id, "stage", {"name": "EMBEDDING", "detail": "No chunks to embed"})
-        return {"clusters": {"domains": {}}}
-    await emit(job_id, "stage", {"name": "EMBEDDING", "detail": f"Embedding {len(chunks)} chunks"})
-    # Build a numbered summary of each chunk for the LLM
-    summaries = []
-    for i, c in enumerate(chunks):
-        # Truncate long chunks for the categorization prompt
-        preview = c["text"][:300].replace("\n", " ")
-        summaries.append(f"[{i}] ({c['source_file']}) {preview}")
-    chunk_list_text = "\n".join(summaries)
-    prompt = """You are an operations analyst. Below is a numbered list of text chunks extracted from a company's internal documents (SOPs, Slack messages, support tickets).
-Categorize each chunk into an operational domain. Use clear domain names like:
-"Customer Support", "Engineering", "Sales", "Human Resources", "Finance", "Operations", etc.
-Return ONLY a valid JSON object mapping domain names to arrays of chunk indices.
-Example: {"Customer Support": [0, 3, 5], "Engineering": [1, 2], "Sales": [4]}
-Every chunk index must appear exactly once. Do not skip any."""
-    response_str = await llm_call(prompt, chunk_list_text)
-    try:
-        clean = response_str.strip()
-        if clean.startswith("```json"):
-            clean = clean[7:]
-        if clean.startswith("```"):
-            clean = clean[3:]
-        if clean.endswith("```"):
-            clean = clean[:-3]
-        domains = json.loads(clean.strip())
-    except Exception as e:
-        print(f"[cluster_evidence] Failed to parse LLM clustering: {e}")
-        # Fallback: put all chunks in one cluster
-        domains = {"General": list(range(len(chunks)))}
-    await emit(job_id, "stage", {
-        "name": "EMBEDDING_DONE",
-        "detail": f"Clustered into {len(domains)} domains: {list(domains.keys())}",
-    })
-    print(f"[{job_id}] Node cluster_evidence finished with {len(domains)} domains")
-    return {"clusters": {"domains": domains}}

backend/graph/nodes/detect_contradictions.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from backend.graph.state import BrainState
+from backend.llm import safe_llm_json_call
+from backend.sse import emit
+SYSTEM = """You are a contradiction detection specialist. Your ONLY job is to find CONTRADICTIONS, CONFLICTS, and INCONSISTENCIES across company communications.
+Output ONLY a JSON array. No preamble. No explanation. No markdown.
+Each item must have exactly these fields:
+  - id: short snake_case identifier (e.g., "refund_window_conflict")
+  - domain: the operational domain this contradiction affects
+  - claim_a: what the first source says
+  - source_a: which source file claim_a comes from
+  - claim_b: what the second source says
+  - source_b: which source file claim_b comes from
+  - resolution: which claim takes precedence in practice (based on Slack/ticket behavior vs SOP policy)
+  - severity: "high", "medium", or "low"
+If you find no contradictions, output: []
+Example: [{"id": "refund_window_conflict", "domain": "Customer Support", "claim_a": "30-day refund window", "source_a": "notion_refund_sop.md", "claim_b": "45-day refund approved for loyal customer", "source_b": "slack_export_support.json", "resolution": "Observed behavior (Slack) shows exceptions beyond SOP — default to SOP, escalate exceptions", "severity": "medium"}]"""
+async def detect_contradictions(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    chunks = state.get("all_chunks", [])
+    print(f"[{job_id}] Node detect_contradictions: processing {len(chunks)} chunks")
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "DETECT_CONTRADICTIONS",
+            "detail": "Detecting cross-source contradictions...",
+        },
+    )
+    chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
+    user = f"Detect contradictions and conflicting instructions across this company data:\n\n{chunk_text}"
+    results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
+    print(f"[{job_id}] detect_contradictions: found {len(results)} contradictions")
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "DETECT_CONTRADICTIONS_DONE",
+            "detail": f"Found {len(results)} contradictions",
+        },
+    )
+    return {"contradictions": results}

backend/graph/nodes/extract_decisions.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from backend.graph.state import BrainState
+from backend.llm import safe_llm_json_call
+from backend.sse import emit
+SYSTEM = """You are a policy extraction specialist. Your ONLY job is to extract DECISIONS, RULES, and POLICIES from company communications.
+Output ONLY a JSON array. No preamble. No explanation. No markdown.
+Each item must have exactly these fields:
+  - id: short snake_case identifier (e.g., "refund_annual_14day")
+  - category: operational domain (e.g., "Customer Support", "Engineering", "Finance")
+  - rule: the precise, actionable rule text including thresholds, timeframes, approvals
+  - rationale: why this rule exists, based on the evidence
+  - evidence: array of specific quotes or references from the source text that support this rule
+  - source_files: array of filenames this rule came from
+If you find no decisions or rules, output: []
+Example: [{"id": "refund_annual_14day", "category": "Customer Support", "rule": "Annual plan customers within 14 days of purchase are eligible for full refund", "rationale": "No-questions policy for annual plans within 14 days", "evidence": ["notion_refund_sop.md: Annual plan customers within 14 days..."], "source_files": ["notion_refund_sop.md"]}]"""
+async def extract_decisions(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    chunks = state.get("all_chunks", [])
+    print(f"[{job_id}] Node extract_decisions: processing {len(chunks)} chunks")
+    await emit(
+        job_id,
+        "stage",
+        {"name": "EXTRACT_DECISIONS", "detail": "Extracting rules and policies..."},
+    )
+    chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
+    user = f"Extract all decisions, rules, and policies from this company data:\n\n{chunk_text}"
+    results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
+    print(f"[{job_id}] extract_decisions: extracted {len(results)} rules")
+    await emit(
+        job_id,
+        "stage",
+        {"name": "EXTRACT_DECISIONS_DONE", "detail": f"Found {len(results)} rules"},
+    )
+    return {"raw_decisions": results}

backend/graph/nodes/extract_exceptions.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from backend.graph.state import BrainState
+from backend.llm import safe_llm_json_call
+from backend.sse import emit
+SYSTEM = """You are an exception extraction specialist. Your ONLY job is to extract EXCEPTIONS, EDGE CASES, CONSTRAINTS, CONDITIONAL RULES, and FORBIDDEN ACTIONS from company communications.
+Output ONLY a JSON array. No preamble. No explanation. No markdown.
+Each item must have exactly these fields:
+  - id: short snake_case identifier (e.g., "no_ltd_refunds")
+  - category: operational domain
+  - condition: the specific condition that triggers this exception
+  - action: what happens when this exception applies
+  - rationale: why this exception exists
+  - source_files: array of filenames this came from
+If you find no exceptions, output: []
+Example: [{"id": "no_ltd_refunds", "category": "Customer Support", "condition": "Customer has a lifetime deal account", "action": "Never process refunds for lifetime deal accounts", "rationale": "Explicitly stated in refund SOP as forbidden action", "source_files": ["notion_refund_sop.md"]}]"""
+async def extract_exceptions(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    chunks = state.get("all_chunks", [])
+    print(f"[{job_id}] Node extract_exceptions: processing {len(chunks)} chunks")
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "EXTRACT_EXCEPTIONS",
+            "detail": "Extracting exceptions and edge cases...",
+        },
+    )
+    chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
+    user = f"Extract all exceptions, edge cases, constraints, and forbidden actions from this company data:\n\n{chunk_text}"
+    results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
+    print(f"[{job_id}] extract_exceptions: extracted {len(results)} exceptions")
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "EXTRACT_EXCEPTIONS_DONE",
+            "detail": f"Found {len(results)} exceptions",
+        },
+    )
+    return {"exception_rules": results}

backend/graph/nodes/extract_workflows.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from backend.graph.state import BrainState
+from backend.llm import safe_llm_json_call
+from backend.sse import emit
+SYSTEM = """You are a workflow extraction specialist. Your ONLY job is to extract WORKFLOWS, PROCESSES, and SEQUENTIAL STEPS from company communications.
+Output ONLY a JSON array. No preamble. No explanation. No markdown.
+Each item must have exactly these fields:
+  - id: short snake_case identifier (e.g., "bug_triage_workflow")
+  - category: operational domain (e.g., "Engineering", "Customer Support")
+  - workflow_name: human-readable name for this workflow
+  - steps: array of step descriptions in order
+  - triggers: what initiates this workflow
+  - source_files: array of filenames this came from
+If you find no workflows, output: []
+Example: [{"id": "bug_triage_workflow", "category": "Engineering", "workflow_name": "Bug Triage", "steps": ["1. Identify severity (P0/P1/P2)", "2. Page on-call for P0", "3. 4hr SLA for P1"], "triggers": ["Bug report filed with severity label"], "source_files": ["notion_eng_runbook.md"]}]"""
+async def extract_workflows(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    chunks = state.get("all_chunks", [])
+    print(f"[{job_id}] Node extract_workflows: processing {len(chunks)} chunks")
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "EXTRACT_WORKFLOWS",
+            "detail": "Extracting workflows and processes...",
+        },
+    )
+    chunk_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks])
+    user = f"Extract all workflows, processes, and step-by-step procedures from this company data:\n\n{chunk_text}"
+    results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
+    print(f"[{job_id}] extract_workflows: extracted {len(results)} workflows")
+    await emit(
+        job_id,
+        "stage",
+        {"name": "EXTRACT_WORKFLOWS_DONE", "detail": f"Found {len(results)} workflows"},
+    )
+    return {"workflow_steps": results}

backend/graph/nodes/ingest_join.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from backend.graph.state import BrainState
+from backend.sse import emit
+async def ingest_join(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    structured_sops = state.get("structured_sops", [])
+    normalized_events = state.get("normalized_events", [])
+    resolved_cases = state.get("resolved_cases", [])
+    all_chunks = []
+    all_chunks.extend(structured_sops)
+    all_chunks.extend(normalized_events)
+    all_chunks.extend(resolved_cases)
+    print(
+        f"[{job_id}] Node ingest_join: merged {len(structured_sops)} SOPs + {len(normalized_events)} events + {len(resolved_cases)} tickets = {len(all_chunks)} chunks"
+    )
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "INGEST_JOIN",
+            "detail": f"Merged {len(all_chunks)} total chunks from all sources",
+        },
+    )
+    return {"all_chunks": all_chunks}

backend/graph/nodes/ingest_notion.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from backend.graph.state import BrainState
+from backend.sse import emit
+async def ingest_notion(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    source_files = state.get("source_files", [])
+    notion_files = [f for f in source_files if f.get("doc_type") == "notion_md"]
+    print(f"[{job_id}] Node ingest_notion: {len(notion_files)} notion files")
+    structured_sops = []
+    for sf in notion_files:
+        chunks = _chunk_markdown(sf)
+        structured_sops.extend(chunks)
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "INGEST_NOTION",
+            "detail": f"Processed {len(notion_files)} SOP files into {len(structured_sops)} chunks",
+        },
+    )
+    print(f"[{job_id}] ingest_notion finished: {len(structured_sops)} chunks")
+    return {"structured_sops": structured_sops}
+def _chunk_markdown(sf: dict) -> list:
+    content = sf["content"]
+    sections = []
+    current_header = "Introduction"
+    current_body = []
+    for line in content.split("\n"):
+        if line.startswith("## "):
+            if current_body:
+                sections.append((current_header, "\n".join(current_body).strip()))
+            current_header = line.lstrip("# ").strip()
+            current_body = []
+        else:
+            current_body.append(line)
+    if current_body:
+        sections.append((current_header, "\n".join(current_body).strip()))
+    chunks = []
+    for i, (header, body) in enumerate(sections):
+        if not body:
+            continue
+        chunks.append(
+            {
+                "text": f"[{header}] {body}",
+                "source_file": sf["filename"],
+                "chunk_index": i,
+                "doc_type": "notion_md",
+                "section_header": header,
+            }
+        )
+    return chunks

backend/graph/nodes/ingest_slack.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import json
+from backend.graph.state import BrainState
+from backend.sse import emit
+async def ingest_slack(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    source_files = state.get("source_files", [])
+    slack_files = [f for f in source_files if f.get("doc_type") == "slack_json"]
+    print(f"[{job_id}] Node ingest_slack: {len(slack_files)} slack files")
+    normalized_events = []
+    for sf in slack_files:
+        chunks = _chunk_slack(sf)
+        normalized_events.extend(chunks)
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "INGEST_SLACK",
+            "detail": f"Processed {len(slack_files)} Slack exports into {len(normalized_events)} messages",
+        },
+    )
+    print(f"[{job_id}] ingest_slack finished: {len(normalized_events)} messages")
+    return {"normalized_events": normalized_events}
+def _chunk_slack(sf: dict) -> list:
+    try:
+        messages = json.loads(sf["content"])
+    except json.JSONDecodeError:
+        return []
+    chunks = []
+    for i, msg in enumerate(messages):
+        text = msg.get("text", "")
+        if not text:
+            continue
+        user = msg.get("user", "unknown")
+        channel = msg.get("channel", "unknown")
+        chunks.append(
+            {
+                "text": f"[Slack #{channel} @{user}] {text}",
+                "source_file": sf["filename"],
+                "chunk_index": i,
+                "doc_type": "slack_json",
+            }
+        )
+    return chunks

backend/graph/nodes/ingest_tickets.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import json
+from backend.graph.state import BrainState
+from backend.sse import emit
+async def ingest_tickets(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    source_files = state.get("source_files", [])
+    ticket_files = [f for f in source_files if f.get("doc_type") == "tickets_json"]
+    print(f"[{job_id}] Node ingest_tickets: {len(ticket_files)} ticket files")
+    resolved_cases = []
+    for sf in ticket_files:
+        chunks = _chunk_tickets(sf)
+        resolved_cases.extend(chunks)
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "INGEST_TICKETS",
+            "detail": f"Processed {len(ticket_files)} ticket files into {len(resolved_cases)} cases",
+        },
+    )
+    print(f"[{job_id}] ingest_tickets finished: {len(resolved_cases)} tickets")
+    return {"resolved_cases": resolved_cases}
+def _chunk_tickets(sf: dict) -> list:
+    try:
+        tickets = json.loads(sf["content"])
+    except json.JSONDecodeError:
+        return []
+    chunks = []
+    for i, tkt in enumerate(tickets):
+        parts = []
+        if tkt.get("subject"):
+            parts.append(f"Subject: {tkt['subject']}")
+        if tkt.get("description"):
+            parts.append(f"Description: {tkt['description']}")
+        if tkt.get("resolution"):
+            parts.append(f"Resolution: {tkt['resolution']}")
+        if tkt.get("priority"):
+            parts.append(f"Priority: {tkt['priority']}")
+        if tkt.get("customer_plan"):
+            parts.append(f"Plan: {tkt['customer_plan']}")
+        text = " | ".join(parts)
+        if not text:
+            continue
+        chunks.append(
+            {
+                "text": f"[Zendesk Ticket] {text}",
+                "source_file": sf["filename"],
+                "chunk_index": i,
+                "doc_type": "tickets_json",
+            }
+        )
+    return chunks

backend/graph/nodes/link_evidence.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import json
+from backend.graph.state import BrainState
+from backend.llm import llm_call
+from backend.sse import emit
+async def link_evidence(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    draft_skills = state.get("draft_skills", [])
+    chunks = state.get("all_chunks", [])
+    print(
+        f"[{job_id}] Node link_evidence: enriching {len(draft_skills)} skills with evidence"
+    )
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "LINKING_EVIDENCE",
+            "detail": f"Linking evidence for {len(draft_skills)} skills",
+        },
+    )
+    if not draft_skills:
+        return {"skills_with_evidence": []}
+    prompt = """You are an evidence linking specialist. Below are draft operational skills and the original source chunks they were extracted from.
+For each skill, find the most specific evidence excerpts from the source chunks that support it. Enrich each skill's evidence array with concrete quotes.
+Return ONLY a JSON object:
+{
+  "skills": [
+    {
+      "id": "skill_id",
+      "category": "...",
+      "rule": "...",
+      "rationale": "...",
+      "evidence": ["Exact quote from source that supports this rule"],
+      "source_files": ["filename.ext"]
+    }
+  ]
+}
+Keep all existing fields intact. Only add or improve the evidence array."""
+    skills_text = json.dumps({"skills": draft_skills}, indent=2)
+    chunks_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks[:25]])
+    user_content = (
+        f"--- Skills ---\n{skills_text}\n\n--- Source Chunks ---\n{chunks_text}"
+    )
+    response_str = await llm_call(prompt, user_content, max_tokens=4096)
+    try:
+        clean = response_str.strip()
+        if clean.startswith("```json"):
+            clean = clean[7:]
+        elif clean.startswith("```"):
+            clean = clean[3:]
+        if clean.endswith("```"):
+            clean = clean[:-3]
+        data = json.loads(clean.strip())
+        enriched = data.get("skills", draft_skills)
+    except Exception as e:
+        print(f"[{job_id}] [link_evidence] Parse error: {e}")
+        enriched = draft_skills
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "LINKING_DONE",
+            "detail": f"Evidence linked for {len(enriched)} skills",
+        },
+    )
+    print(f"[{job_id}] link_evidence: done")
+    return {"skills_with_evidence": enriched}

backend/graph/nodes/load_and_chunk.py DELETED Viewed

@@ -1,174 +0,0 @@
-"""
-Node 1: Load source files from disk and chunk them.
-Emits SSE stages: LOADING_DOCS, CHUNKING
-"""
-import os
-import json
-import hashlib
-import time
-from backend.graph.state import BrainState
-from backend.sse import emit
-async def load_and_chunk(state: BrainState) -> dict:
-    company_id = state["company_id"]
-    job_id = state["job_id"]
-    print(f"[{job_id}] Node load_and_chunk started")
-    await emit(job_id, "stage", {"name": "LOADING_DOCS", "detail": f"Reading sources for {company_id}"})
-    # Read files from the company-specific directory
-    # __file__ is backend/graph/nodes/load_and_chunk.py
-    base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-    sources_dir = os.path.join(base, "data", "sources", company_id)
-    if not os.path.isdir(sources_dir):
-        await emit(job_id, "pipeline_error", {"error": f"No source directory found: data/sources/{company_id}/"})
-        print(f"[{job_id}] Node load_and_chunk failed (Missing dir: {sources_dir})")
-        return {"errors": [f"Missing directory: {sources_dir}"], "source_files": [], "chunks": []}
-    source_files = []
-    for filename in sorted(os.listdir(sources_dir)):
-        filepath = os.path.join(sources_dir, filename)
-        if not os.path.isfile(filepath):
-            continue
-        with open(filepath, "r", encoding="utf-8") as f:
-            content = f.read()
-        doc_type = _detect_type(filename)
-        source_files.append({
-            "filename": filename,
-            "content": content,
-            "sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
-            "doc_type": doc_type,
-        })
-    await emit(job_id, "stage", {
-        "name": "CHUNKING",
-        "detail": f"Splitting {len(source_files)} files into chunks",
-    })
-    chunks = []
-    for sf in source_files:
-        if sf["doc_type"] == "notion_md":
-            chunks.extend(_chunk_markdown(sf))
-        elif sf["doc_type"] == "slack_json":
-            chunks.extend(_chunk_slack(sf))
-        elif sf["doc_type"] == "tickets_json":
-            chunks.extend(_chunk_tickets(sf))
-        else:
-            # Treat unknown as plain text
-            chunks.append({
-                "text": sf["content"],
-                "source_file": sf["filename"],
-                "chunk_index": 0,
-                "doc_type": sf["doc_type"],
-            })
-    await emit(job_id, "stage", {
-        "name": "CHUNKING_DONE",
-        "detail": f"Produced {len(chunks)} chunks from {len(source_files)} files",
-    })
-    print(f"[{job_id}] Node load_and_chunk finished (chunks: {len(chunks)})")
-    return {"source_files": source_files, "chunks": chunks}
-# --- Helpers ---
-def _detect_type(filename: str) -> str:
-    fn = filename.lower()
-    if fn.endswith(".json"):
-        if "slack" in fn:
-            return "slack_json"
-        if "ticket" in fn or "zendesk" in fn:
-            return "tickets_json"
-        return "json"
-    if fn.endswith(".md"):
-        return "notion_md"
-    return "unknown"
-def _chunk_markdown(sf: dict) -> list:
-    """Split a markdown file by ## headers. Each section is a chunk."""
-    content = sf["content"]
-    sections = []
-    current_header = "Introduction"
-    current_body = []
-    for line in content.split("\n"):
-        if line.startswith("## "):
-            if current_body:
-                sections.append((current_header, "\n".join(current_body).strip()))
-            current_header = line.lstrip("# ").strip()
-            current_body = []
-        else:
-            current_body.append(line)
-    if current_body:
-        sections.append((current_header, "\n".join(current_body).strip()))
-    chunks = []
-    for i, (header, body) in enumerate(sections):
-        if not body:
-            continue
-        chunks.append({
-            "text": f"[{header}] {body}",
-            "source_file": sf["filename"],
-            "chunk_index": i,
-            "doc_type": "notion_md",
-            "section_header": header,
-        })
-    return chunks
-def _chunk_slack(sf: dict) -> list:
-    """Each Slack message is one chunk."""
-    try:
-        messages = json.loads(sf["content"])
-    except json.JSONDecodeError:
-        return []
-    chunks = []
-    for i, msg in enumerate(messages):
-        text = msg.get("text", "")
-        if not text:
-            continue
-        user = msg.get("user", "unknown")
-        channel = msg.get("channel", "unknown")
-        chunks.append({
-            "text": f"[Slack #{channel} @{user}] {text}",
-            "source_file": sf["filename"],
-            "chunk_index": i,
-            "doc_type": "slack_json",
-        })
-    return chunks
-def _chunk_tickets(sf: dict) -> list:
-    """Each ticket is one chunk."""
-    try:
-        tickets = json.loads(sf["content"])
-    except json.JSONDecodeError:
-        return []
-    chunks = []
-    for i, tkt in enumerate(tickets):
-        parts = []
-        if tkt.get("subject"):
-            parts.append(f"Subject: {tkt['subject']}")
-        if tkt.get("description"):
-            parts.append(f"Description: {tkt['description']}")
-        if tkt.get("resolution"):
-            parts.append(f"Resolution: {tkt['resolution']}")
-        if tkt.get("priority"):
-            parts.append(f"Priority: {tkt['priority']}")
-        if tkt.get("customer_plan"):
-            parts.append(f"Plan: {tkt['customer_plan']}")
-        text = " | ".join(parts)
-        if not text:
-            continue
-        chunks.append({
-            "text": f"[Zendesk Ticket] {text}",
-            "source_file": sf["filename"],
-            "chunk_index": i,
-            "doc_type": "tickets_json",
-        })
-    return chunks

backend/graph/nodes/load_sources.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import hashlib
+from backend.graph.state import BrainState
+from backend.sse import emit
+def _detect_type(filename: str) -> str:
+    fn = filename.lower()
+    if fn.endswith(".json"):
+        if "slack" in fn:
+            return "slack_json"
+        if "ticket" in fn or "zendesk" in fn:
+            return "tickets_json"
+        return "json"
+    if fn.endswith(".md"):
+        return "notion_md"
+    return "unknown"
+async def load_sources(state: BrainState) -> dict:
+    company_id = state["company_id"]
+    job_id = state["job_id"]
+    print(f"[{job_id}] Node load_sources started")
+    await emit(
+        job_id,
+        "stage",
+        {"name": "LOADING_DOCS", "detail": f"Reading sources for {company_id}"},
+    )
+    base = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    )
+    sources_dir = os.path.join(base, "data", "sources", company_id)
+    if not os.path.isdir(sources_dir):
+        await emit(
+            job_id,
+            "pipeline_error",
+            {"error": f"No source directory: data/sources/{company_id}/"},
+        )
+        print(f"[{job_id}] load_sources failed — missing dir: {sources_dir}")
+        return {"errors": [f"Missing directory: {sources_dir}"], "source_files": []}
+    source_files = []
+    for filename in sorted(os.listdir(sources_dir)):
+        filepath = os.path.join(sources_dir, filename)
+        if not os.path.isfile(filepath):
+            continue
+        with open(filepath, "r", encoding="utf-8") as f:
+            content = f.read()
+        source_files.append(
+            {
+                "filename": filename,
+                "content": content,
+                "sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
+                "doc_type": _detect_type(filename),
+            }
+        )
+    print(f"[{job_id}] load_sources finished: {len(source_files)} files")
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "LOADING_DOCS_DONE",
+            "detail": f"Loaded {len(source_files)} source files",
+        },
+    )
+    return {"source_files": source_files}

backend/graph/nodes/quality_normalize.py DELETED Viewed

@@ -1,83 +0,0 @@
-"""
-Node 4: De-duplicate skills, resolve conflicts, score confidence, enforce schema.
-Emits SSE stage: QUALITY_CHECK
-"""
-import json
-from backend.graph.state import BrainState
-from backend.llm import llm_call
-from backend.sse import emit
-async def quality_normalize(state: BrainState) -> dict:
-    job_id = state["job_id"]
-    raw_skills = state.get("raw_skills", [])
-    print(f"[{job_id}] Node quality_normalize started with {len(raw_skills)} raw skills")
-    if not raw_skills:
-        await emit(job_id, "stage", {"name": "QUALITY_CHECK", "detail": "No skills to normalize"})
-        print(f"[{job_id}] Node quality_normalize finished (0 skills)")
-        return {"skills_file": {"skills": []}}
-    await emit(job_id, "stage", {
-        "name": "QUALITY_CHECK",
-        "detail": f"Normalizing {len(raw_skills)} raw skills",
-    })
-    prompt = """You are a quality assurance agent for an operational skills file.
-Below is a raw list of skills extracted from company documents. Your job:
-1. DEDUPLICATE: merge skills that describe the same rule (keep the most complete version).
-2. RESOLVE CONFLICTS: if two skills contradict, keep both but note the conflict in the rationale. Prefer observed behavior (from Slack/tickets) over stated policy (from SOPs) when they conflict.
-3. SCORE CONFIDENCE (0.0 to 1.0) for each skill based on:
-   - 0.9–1.0: multiple confirming sources, clear unambiguous rule
-   - 0.7–0.89: single strong source or multiple weak sources
-   - 0.5–0.69: only one source, or some ambiguity
-   - 0.3–0.49: weak evidence or significant ambiguity
-   - < 0.3: speculative or poorly supported
-4. ENFORCE SCHEMA: every skill must have: id, category, rule, rationale, evidence (array), confidence (float).
-Return ONLY a JSON object:
-{
-  "skills": [
-    {
-      "id": "skill_slug",
-      "category": "Domain Name",
-      "rule": "The specific rule text",
-      "rationale": "Why this rule exists",
-      "evidence": ["source reference 1", "source reference 2"],
-      "confidence": 0.85
-    }
-  ]
-}"""
-    skills_text = json.dumps(raw_skills, indent=2)
-    print(f"[{job_id}] Requesting quality normalization...")
-    response_str = await llm_call(prompt, skills_text, max_tokens=8192)
-    print(f"[{job_id}] Received quality normalization response")
-    try:
-        clean = response_str.strip()
-        if clean.startswith("```json"):
-            clean = clean[7:]
-        if clean.startswith("```"):
-            clean = clean[3:]
-        if clean.endswith("```"):
-            clean = clean[:-3]
-        data = json.loads(clean.strip())
-        final_skills = data.get("skills", raw_skills)
-    except Exception as e:
-        print(f"[{job_id}] [quality_normalize] Parse error: {e}")
-        # Fallback: use raw skills with default confidence
-        final_skills = raw_skills
-        for sk in final_skills:
-            sk.setdefault("confidence", 0.5)
-    await emit(job_id, "stage", {
-        "name": "QUALITY_CHECK_DONE",
-        "detail": f"Final skills count: {len(final_skills)} (from {len(raw_skills)} raw)",
-    })
-    print(f"[{job_id}] Node quality_normalize finished (final skills: {len(final_skills)})")
-    return {"skills_file": {"skills": final_skills}}

backend/graph/nodes/score_confidence.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from backend.graph.state import BrainState
+from backend.sse import emit
+def _score_confidence(skill: dict, contradictions: list) -> float:
+    """Math-based confidence scoring per the CLAUDE.md formula."""
+    base = 0.5
+    source_count = len(skill.get("evidence", []))
+    if source_count >= 3:
+        base += 0.25
+    elif source_count == 2:
+        base += 0.15
+    elif source_count == 1:
+        base += 0.05
+    base += 0.15
+    skill_id = skill.get("id", "")
+    has_contradiction = any(
+        c.get("id", "").startswith(skill_id.split("_")[0])
+        or skill_id in str(c.get("domain", ""))
+        for c in contradictions
+    )
+    if not has_contradiction:
+        base += 0.10
+    return round(min(base, 1.0), 2)
+async def score_confidence(state: BrainState) -> dict:
+    job_id = state["job_id"]
+    skills = state.get("skills_with_evidence", [])
+    contradictions = state.get("contradictions", [])
+    print(f"[{job_id}] Node score_confidence: scoring {len(skills)} skills")
+    await emit(
+        job_id,
+        "stage",
+        {"name": "SCORING_CONFIDENCE", "detail": f"Scoring {len(skills)} skills"},
+    )
+    final_skills = []
+    for skill in skills:
+        skill["confidence"] = _score_confidence(skill, contradictions)
+        final_skills.append(skill)
+    avg_conf = round(
+        sum(s.get("confidence", 0) for s in final_skills) / max(len(final_skills), 1), 2
+    )
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "SCORING_DONE",
+            "detail": f"Average confidence: {avg_conf} across {len(final_skills)} skills",
+        },
+    )
+    print(f"[{job_id}] score_confidence: avg confidence {avg_conf}")
+    return {"final_skills": final_skills}

backend/graph/nodes/synthesize_skills.py CHANGED Viewed

@@ -1,9 +1,4 @@
-"""
-Node 3: For each domain cluster, call vLLM to synthesize structured skills.
-Emits SSE stage: SYNTHESIZING_SKILLS
-"""
 import json
-import uuid
 from backend.graph.state import BrainState
 from backend.llm import llm_call
 from backend.sse import emit
@@ -11,101 +6,102 @@ from backend.sse import emit
 async def synthesize_skills(state: BrainState) -> dict:
     job_id = state["job_id"]
-    chunks = state.get("chunks", [])
-    clusters = state.get("clusters", {})
-    domains = clusters.get("domains", {})
-    print(f"[{job_id}] Node synthesize_skills started with {len(domains)} domains")
-    if not domains:
-        await emit(job_id, "stage", {"name": "SYNTHESIZING_SKILLS", "detail": "No clusters to synthesize"})
-        print(f"[{job_id}] Node synthesize_skills finished (0 domains)")
-        return {"raw_skills": []}
-    await emit(job_id, "stage", {
-        "name": "SYNTHESIZING_SKILLS",
-        "detail": f"Synthesizing skills for {len(domains)} domains",
-    })
-    all_skills = []
-    for domain_name, chunk_indices in domains.items():
-        # Gather the actual chunk texts for this domain
-        domain_chunks = []
-        for idx in chunk_indices:
-            if 0 <= idx < len(chunks):
-                domain_chunks.append(chunks[idx])
-        if not domain_chunks:
-            continue
-        chunk_text = "\n\n".join([c["text"] for c in domain_chunks])
-        source_files = list(set(c["source_file"] for c in domain_chunks))
-        prompt = f"""You are a Principal Operations Architect analyzing the "{domain_name}" domain.
-Below are real excerpts from a company's internal documents (SOPs, Slack messages, support tickets) related to {domain_name}.
-Your job: extract every distinct operational rule, policy, process, or decision pattern you can find.
-For EACH skill, provide:
-- id: a unique identifier (use a short slug like "refund_loyal_customer")
-- category: "{domain_name}"
-- rule: the specific, actionable rule or process (be precise — include thresholds, timeframes, approvals)
-- rationale: why this rule exists (based on the evidence)
-- evidence: array of specific quotes or references from the source chunks that support this rule
 - source_files: which files this came from
-Rules for quality:
-- Extract what the documents ACTUALLY say, not what you assume.
-- If there are contradictions (e.g., SOP says X but Slack shows Y), note BOTH and state which takes precedence in practice.
-- Do NOT invent rules that aren't supported by the text below.
-- Each rule should be specific enough that a human could follow it without additional context.
 Respond with ONLY a JSON object:
-{{
   "skills": [
-    {{
-      "id": "refund_loyal_customer",
-      "category": "{domain_name}",
-      "rule": "Approve refunds up to 45 days for customers with >2 years tenure",
-      "rationale": "Exception applied over standard 30-day limit for loyal customers",
-      "evidence": ["slack_export_support.json: Mike approved 45-day refund for Acme Corp"],
-      "source_files": ["slack_export_support.json", "notion_refund_sop.md"]
-    }}
   ]
-}}"""
-        print(f"[{job_id}] Requesting skills for domain '{domain_name}'...")
-        response_str = await llm_call(prompt, chunk_text)
-        print(f"[{job_id}] Received skills response for domain '{domain_name}'")
-        try:
-            clean = response_str.strip()
-            if clean.startswith("```json"):
-                clean = clean[7:]
-            if clean.startswith("```"):
-                clean = clean[3:]
-            if clean.endswith("```"):
-                clean = clean[:-3]
-            data = json.loads(clean.strip())
-            domain_skills = data.get("skills", [])
-        except Exception as e:
-            print(f"[{job_id}] [synthesize_skills] Parse error for {domain_name}: {e}")
-            domain_skills = []
-        # Ensure every skill has an id
-        for sk in domain_skills:
-            if not sk.get("id"):
-                sk["id"] = str(uuid.uuid4())[:8]
-            sk["category"] = domain_name  # ensure consistency
-        all_skills.extend(domain_skills)
-        await emit(job_id, "stage", {
-            "name": "SYNTHESIZING_SKILLS",
-            "detail": f"{domain_name}: extracted {len(domain_skills)} skills",
-        })
-    print(f"[{job_id}] Node synthesize_skills finished (extracted {len(all_skills)} skills overall)")
-    return {"raw_skills": all_skills}

 import json
 from backend.graph.state import BrainState
 from backend.llm import llm_call
 from backend.sse import emit
 async def synthesize_skills(state: BrainState) -> dict:
     job_id = state["job_id"]
+    raw_decisions = state.get("raw_decisions", [])
+    workflow_steps = state.get("workflow_steps", [])
+    exception_rules = state.get("exception_rules", [])
+    contradictions = state.get("contradictions", [])
+    total_raw = (
+        len(raw_decisions)
+        + len(workflow_steps)
+        + len(exception_rules)
+        + len(contradictions)
+    )
+    print(
+        f"[{job_id}] Node synthesize_skills: merging {len(raw_decisions)} decisions + {len(workflow_steps)} workflows + {len(exception_rules)} exceptions + {len(contradictions)} contradictions"
+    )
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "SYNTHESIZING_SKILLS",
+            "detail": f"Merging {total_raw} extracted items into cohesive skills",
+        },
+    )
+    if total_raw == 0:
+        print(f"[{job_id}] synthesize_skills: no extractions to merge")
+        return {"draft_skills": []}
+    prompt = """You are a Principal Operations Architect. Below are four sets of extractions from company data:
+1. DECISIONS & RULES: explicit policies and decision criteria
+2. WORKFLOWS: step-by-step processes and procedures
+3. EXCEPTIONS: edge cases, constraints, forbidden actions
+4. CONTRADICTIONS: conflicts between different sources
+Merge these into unified operational skills. For each skill:
+- id: short snake_case identifier
+- category: operational domain name
+- rule: the specific, actionable rule text (be precise — include thresholds, timeframes, approvals)
+- rationale: why this rule exists (based on evidence)
+- evidence: array of specific quotes or references from source data
 - source_files: which files this came from
+Quality rules:
+- Deduplicate: merge skills that describe the same rule (keep the most complete version)
+- Resolve conflicts: note contradictions in the rationale
+- Do NOT invent rules that aren't supported by the extractions
+- Each rule should be specific enough that a human could follow it
 Respond with ONLY a JSON object:
+{
   "skills": [
+    {
+      "id": "handle_refund_request",
+      "category": "Customer Support",
+      "rule": "Approve full refund for annual plans within 14 days",
+      "rationale": "No-questions policy within 14 days for annual plans",
+      "evidence": ["notion_refund_sop.md: Annual plan customers within 14 days..."],
+      "source_files": ["notion_refund_sop.md"]
+    }
   ]
+}"""
+    extractions_text = json.dumps(
+        {
+            "decisions_and_rules": raw_decisions,
+            "workflows_and_processes": workflow_steps,
+            "exceptions_and_edge_cases": exception_rules,
+            "contradictions": contradictions,
+        },
+        indent=2,
+    )
+    response_str = await llm_call(prompt, extractions_text, max_tokens=4096)
+    try:
+        clean = response_str.strip()
+        if clean.startswith("```json"):
+            clean = clean[7:]
+        elif clean.startswith("```"):
+            clean = clean[3:]
+        if clean.endswith("```"):
+            clean = clean[:-3]
+        data = json.loads(clean.strip())
+        draft = data.get("skills", [])
+    except Exception as e:
+        print(f"[{job_id}] [synthesize_skills] Parse error: {e}")
+        draft = []
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "SYNTHESIZING_DONE",
+            "detail": f"Synthesized {len(draft)} skills from {total_raw} extractions",
+        },
+    )
+    print(f"[{job_id}] synthesize_skills: produced {len(draft)} skills")
+    return {"draft_skills": draft}

backend/graph/nodes/write_brain.py CHANGED Viewed

@@ -1,33 +1,57 @@
-"""
-Node 5: Write the final skills file to the database.
-Emits SSE stage: WRITING_DB, then pipeline_complete.
-"""
 import time
 import json
 import uuid
 import datetime
 from backend.graph.state import BrainState
 from backend.db.supabase import get_client
 from backend.sse import emit
 async def write_brain(state: BrainState) -> dict:
     job_id = state.get("job_id")
     company_id = state.get("company_id")
-    skills_file = state.get("skills_file", {})
-    skills = skills_file.get("skills", [])
     start_time = state.get("start_time", time.time())
     duration_ms = int((time.time() - start_time) * 1000)
-    print(f"[{job_id}] Node write_brain started for {company_id}")
-    await emit(job_id, "stage", {"name": "WRITING_DB", "detail": f"Persisting {len(skills)} skills"})
     db = get_client()
     if not db:
         await emit(job_id, "pipeline_error", {"error": "Database connection failed"})
-        print(f"[{job_id}] Node write_brain failed (no DB client)")
-        return {"errors": ["DB connection failed in write_brain"]}
     try:
         now_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
@@ -38,59 +62,74 @@ async def write_brain(state: BrainState) -> dict:
             if "filename" in f and "sha256" in f:
                 source_hashes[f["filename"]] = f["sha256"]
-        # Mark previous brain as not current
-        db.table("skills_files").update(
-            {"is_current": False}
-        ).eq("company_id", company_id).eq("is_current", True).execute()
-        # Insert new brain
-        sf_res = db.table("skills_files").insert({
-            "company_id": company_id,
-            "version": version_str,
-            "brain_json": skills_file,
-            "source_hashes": source_hashes,
-            "is_current": True,
-        }).execute()
         sf_id = sf_res.data[0]["id"]
-        # Insert individual skills
-        for skill in skills:
-            db.table("skills").insert({
-                "id": skill.get("id", str(uuid.uuid4())[:8]),
-                "company_id": company_id,
-                "skills_file_id": sf_id,
-                "name": skill.get("rule", "Unknown")[:200],
-                "domain": skill.get("category", "general"),
-                "version": version_str,
-                "confidence": float(skill.get("confidence", 0.5)),
-                "skill_json": skill,
-            }).execute()
-        # Update compile run
-        db.table("compile_runs").update({
-            "status": "complete",
-            "completed_at": now_iso,
-            "duration_ms": duration_ms,
-            "result_version": version_str,
-        }).eq("id", job_id).execute()
     except Exception as e:
         print(f"[{job_id}] [write_brain] DB Error: {e}")
         await emit(job_id, "pipeline_error", {"error": str(e)})
-        return {"errors": [f"write_brain DB error: {e}"]}
-    await emit(job_id, "stage", {
-        "name": "DONE",
-        "detail": f"Brain {version_str} written: {len(skills)} skills, {len(source_hashes)} sources, {duration_ms}ms",
-    })
-    await emit(job_id, "pipeline_complete", {
-        "status": "success",
-        "version": version_str,
-        "skills_count": len(skills),
-        "source_count": len(source_hashes),
-        "duration_ms": duration_ms,
-    })
-    print(f"[{job_id}] Node write_brain finished successfully (version: {version_str})")
-    return {}

 import time
 import json
 import uuid
 import datetime
 from backend.graph.state import BrainState
 from backend.db.supabase import get_client
+from backend.llm import get_embedding
 from backend.sse import emit
 async def write_brain(state: BrainState) -> dict:
     job_id = state.get("job_id")
     company_id = state.get("company_id")
+    final_skills = state.get("final_skills", [])
     start_time = state.get("start_time", time.time())
     duration_ms = int((time.time() - start_time) * 1000)
+    print(
+        f"[{job_id}] Node write_brain: persisting {len(final_skills)} skills for {company_id}"
+    )
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "WRITING_DB",
+            "detail": f"Pre-embedding and persisting {len(final_skills)} skills",
+        },
+    )
+    skills_with_embeddings = []
+    for skill in final_skills:
+        skill_text = f"{skill.get('category', '')} {skill.get('rule', '')} {skill.get('rationale', '')}"
+        emb = get_embedding(skill_text)
+        skill["embedding_vector"] = emb
+        skills_with_embeddings.append(skill)
+    skills_file = {
+        "skills": skills_with_embeddings,
+        "meta": {
+            "company_id": company_id,
+            "compiled_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+            "total_skills": len(skills_with_embeddings),
+            "duration_ms": duration_ms,
+        },
+    }
     db = get_client()
     if not db:
         await emit(job_id, "pipeline_error", {"error": "Database connection failed"})
+        print(f"[{job_id}] write_brain: no DB client")
+        return {
+            "errors": ["DB connection failed in write_brain"],
+            "skills_file": skills_file,
+        }
     try:
         now_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
             if "filename" in f and "sha256" in f:
                 source_hashes[f["filename"]] = f["sha256"]
+        db.table("skills_files").update({"is_current": False}).eq(
+            "company_id", company_id
+        ).eq("is_current", True).execute()
+        sf_res = (
+            db.table("skills_files")
+            .insert(
+                {
+                    "company_id": company_id,
+                    "version": version_str,
+                    "brain_json": skills_file,
+                    "source_hashes": source_hashes,
+                    "is_current": True,
+                }
+            )
+            .execute()
+        )
         sf_id = sf_res.data[0]["id"]
+        for skill in skills_with_embeddings:
+            skill_copy = {k: v for k, v in skill.items() if k != "embedding_vector"}
+            db.table("skills").insert(
+                {
+                    "id": skill.get("id", str(uuid.uuid4())[:8]),
+                    "company_id": company_id,
+                    "skills_file_id": sf_id,
+                    "name": skill.get("rule", "Unknown")[:200],
+                    "domain": skill.get("category", "general"),
+                    "version": version_str,
+                    "confidence": float(skill.get("confidence", 0.5)),
+                    "skill_json": skill_copy,
+                }
+            ).execute()
+        db.table("compile_runs").update(
+            {
+                "status": "complete",
+                "completed_at": now_iso,
+                "duration_ms": duration_ms,
+                "result_version": version_str,
+            }
+        ).eq("id", job_id).execute()
     except Exception as e:
         print(f"[{job_id}] [write_brain] DB Error: {e}")
         await emit(job_id, "pipeline_error", {"error": str(e)})
+        return {"errors": [f"write_brain DB error: {e}"], "skills_file": skills_file}
+    await emit(
+        job_id,
+        "stage",
+        {
+            "name": "DONE",
+            "detail": f"Brain {version_str} written: {len(skills_with_embeddings)} skills, {len(source_hashes)} sources, {duration_ms}ms",
+        },
+    )
+    await emit(
+        job_id,
+        "pipeline_complete",
+        {
+            "status": "success",
+            "version": version_str,
+            "skills_count": len(skills_with_embeddings),
+            "source_count": len(source_hashes),
+            "duration_ms": duration_ms,
+        },
+    )
+    print(f"[{job_id}] write_brain: done (version: {version_str})")
+    return {"skills_file": skills_file, "brain_version": version_str}

backend/graph/state.py CHANGED Viewed

@@ -1,14 +1,28 @@
 from typing import TypedDict, Annotated, List, Dict, Any
 import operator
 class BrainState(TypedDict):
     company_id: str
     job_id: str
-    source_files: List[Dict[str, Any]]   # [{filename, content, sha256, doc_type}]
-    chunks: List[Dict[str, Any]]         # [{text, source_file, chunk_index, doc_type}]
-    clusters: Dict[str, Any]             # {domains: {domain_name: [chunk_indices]}}
-    raw_skills: List[Dict[str, Any]]     # skills before quality pass
-    skills_file: Dict[str, Any]          # final {skills: [...]}
     brain_version: str
     start_time: float
     errors: Annotated[List[str], operator.add]

 from typing import TypedDict, Annotated, List, Dict, Any
 import operator
 class BrainState(TypedDict):
     company_id: str
     job_id: str
+    source_files: Annotated[List[Dict[str, Any]], operator.add]
+    structured_sops: Annotated[List[Dict[str, Any]], operator.add]
+    normalized_events: Annotated[List[Dict[str, Any]], operator.add]
+    resolved_cases: Annotated[List[Dict[str, Any]], operator.add]
+    all_chunks: List[Dict[str, Any]]
+    raw_decisions: Annotated[List[Dict[str, Any]], operator.add]
+    workflow_steps: Annotated[List[Dict[str, Any]], operator.add]
+    exception_rules: Annotated[List[Dict[str, Any]], operator.add]
+    contradictions: Annotated[List[Dict[str, Any]], operator.add]
+    draft_skills: List[Dict[str, Any]]
+    skills_with_evidence: List[Dict[str, Any]]
+    final_skills: List[Dict[str, Any]]
+    skills_file: Dict[str, Any]
     brain_version: str
     start_time: float
     errors: Annotated[List[str], operator.add]

backend/llm.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import json
 import numpy as np
 from openai import AsyncOpenAI
 from dotenv import load_dotenv
@@ -11,26 +13,34 @@ MODEL_NAME = "RedHatAI/Qwen2.5-72B-Instruct-FP8-dynamic"
 llm = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="not-needed", timeout=120.0)
 # --- Embedding model (local, fast, centralized here) ---
 _embedding_model = None
 def _get_embedding_model():
     global _embedding_model
     if _embedding_model is None:
         from sentence_transformers import SentenceTransformer
         _embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
     return _embedding_model
 def get_embedding(text: str) -> list:
     """Return a single embedding vector as a Python list."""
     model = _get_embedding_model()
     return model.encode(text).tolist()
 def get_embeddings(texts: list) -> list:
     """Return a list of embedding vectors."""
     model = _get_embedding_model()
     return [v.tolist() for v in model.encode(texts)]
 def cosine_similarity(v1, v2) -> float:
     """Cosine similarity between two vectors."""
     a, b = np.array(v1), np.array(v2)
@@ -39,6 +49,7 @@ def cosine_similarity(v1, v2) -> float:
         return 0.0
     return float(np.dot(a, b) / denom)
 async def check_vllm_health() -> dict:
     """Ping the vLLM /v1/models endpoint. Returns status dict."""
     try:
@@ -48,18 +59,102 @@ async def check_vllm_health() -> dict:
     except Exception as e:
         return {"healthy": False, "error": str(e), "url": VLLM_BASE_URL}
-async def llm_call(system_prompt: str, user_content: str, temperature: float = 0.1, max_tokens: int = 4096) -> str:
-    """Single centralized LLM call through vLLM. Raises on failure."""
     try:
-        response = await llm.chat.completions.create(
-            model=MODEL_NAME,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_content}
-            ],
-            temperature=temperature,
-            max_tokens=max_tokens
         )
-        return response.choices[0].message.content
-    except Exception as e:
-        raise RuntimeError(f"vLLM call failed ({VLLM_BASE_URL}): {e}")

 import os
 import json
+import re
+import asyncio
 import numpy as np
 from openai import AsyncOpenAI
 from dotenv import load_dotenv
 llm = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="not-needed", timeout=120.0)
+# --- Concurrency throttle for parallel extraction ---
+_semaphore = asyncio.Semaphore(4)
 # --- Embedding model (local, fast, centralized here) ---
 _embedding_model = None
 def _get_embedding_model():
     global _embedding_model
     if _embedding_model is None:
         from sentence_transformers import SentenceTransformer
         _embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
     return _embedding_model
 def get_embedding(text: str) -> list:
     """Return a single embedding vector as a Python list."""
     model = _get_embedding_model()
     return model.encode(text).tolist()
 def get_embeddings(texts: list) -> list:
     """Return a list of embedding vectors."""
     model = _get_embedding_model()
     return [v.tolist() for v in model.encode(texts)]
 def cosine_similarity(v1, v2) -> float:
     """Cosine similarity between two vectors."""
     a, b = np.array(v1), np.array(v2)
         return 0.0
     return float(np.dot(a, b) / denom)
 async def check_vllm_health() -> dict:
     """Ping the vLLM /v1/models endpoint. Returns status dict."""
     try:
     except Exception as e:
         return {"healthy": False, "error": str(e), "url": VLLM_BASE_URL}
+async def llm_call(
+    system_prompt: str,
+    user_content: str,
+    temperature: float = 0.1,
+    max_tokens: int = 4096,
+) -> str:
+    """Single centralized LLM call through vLLM — uses semaphore for concurrency control."""
+    async with _semaphore:
+        try:
+            response = await llm.chat.completions.create(
+                model=MODEL_NAME,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_content},
+                ],
+                temperature=temperature,
+                max_tokens=max_tokens,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            raise RuntimeError(f"vLLM call failed ({VLLM_BASE_URL}): {e}")
+# ─────────────────────────────────────────────
+# JSON Self-Repair Utilities
+# ─────────────────────────────────────────────
+def _strip_fences(raw: str) -> str:
+    """Remove markdown code fences from LLM output."""
+    clean = raw.strip()
+    if clean.startswith("```json"):
+        clean = clean[7:]
+    elif clean.startswith("```"):
+        clean = clean[3:]
+    if clean.endswith("```"):
+        clean = clean[:-3]
+    return clean.strip()
+def _repair_json(raw: str) -> str:
+    """Apply regex heuristics to repair common JSON formatting issues."""
+    # Remove trailing commas before closing brackets/braces
+    raw = re.sub(r",\s*([}\]])", r"\1", raw)
+    # Remove trailing comma at end of string
+    raw = re.sub(r",\s*$", "", raw)
+    # Ensure balanced brackets (simple count check)
+    return raw
+async def safe_llm_json_call(
+    system_prompt: str,
+    user_content: str,
+    temperature: float = 0.1,
+    max_tokens: int = 4096,
+) -> list:
+    """
+    Call the LLM expecting a JSON array response.
+    Retries once on parse failure with a repair prompt.
+    Returns [] on final failure — never crashes the pipeline.
+    """
+    raw = await llm_call(system_prompt, user_content, temperature, max_tokens)
+    cleaned = _strip_fences(raw)
+    repaired = _repair_json(cleaned)
     try:
+        result = json.loads(repaired)
+        if isinstance(result, list):
+            return result
+        if isinstance(result, dict):
+            # Some nodes return {"skills": [...]} — unwrap
+            for key in ("skills", "items", "results", "data"):
+                if key in result and isinstance(result[key], list):
+                    return result[key]
+            return [result]
+        return []
+    except json.JSONDecodeError:
+        # Retry once with a stricter prompt
+        retry_prompt = (
+            system_prompt
+            + "\n\nCRITICAL: Your previous response was not valid JSON. Return ONLY a valid JSON array. No markdown. No text outside the JSON."
         )
+        retry_user = f"The raw string that failed to parse was:\n\n{raw}\n\n---\n\nPlease redo the extraction correctly:\n{user_content}"
+        try:
+            raw2 = await llm_call(retry_prompt, retry_user, temperature, max_tokens)
+            cleaned2 = _strip_fences(raw2)
+            repaired2 = _repair_json(cleaned2)
+            result2 = json.loads(repaired2)
+            if isinstance(result2, list):
+                return result2
+            if isinstance(result2, dict):
+                for key in ("skills", "items", "results", "data"):
+                    if key in result2 and isinstance(result2[key], list):
+                        return result2[key]
+                return [result2]
+            return []
+        except Exception:
+            return []

backend/main.py CHANGED Viewed

@@ -7,15 +7,18 @@ import time
 import json
 import hashlib
 import shutil
 from backend.graph.graph import build_compilation_graph
 from backend.sse import event_bus, emit
 from backend.agent.brain_agent import handle_agent_query
-from backend.db.supabase import get_client
 from backend.llm import check_vllm_health
 from backend.models.schemas import CompileRequest, AgentHandleRequest, AgentQueryRequest
-app = FastAPI(title="Kernl API", version="2.0.0")
 app.add_middleware(
     CORSMiddleware,
@@ -52,7 +55,6 @@ def _company_sources_dir(company_id: str) -> str:
 @app.post("/sources/upload")
 async def upload_source(company_id: str = Form(...), file: UploadFile = File(...)):
-    """Upload a source file for a company."""
     dest_dir = _company_sources_dir(company_id)
     os.makedirs(dest_dir, exist_ok=True)
@@ -63,16 +65,17 @@ async def upload_source(company_id: str = Form(...), file: UploadFile = File(...
     file_hash = hashlib.sha256(content).hexdigest()
-    # Record in DB
     db = get_client()
     if db:
         try:
-            db.table("source_files").insert({
-                "company_id": company_id,
-                "filename": file.filename,
-                "sha256": file_hash,
-                "storage_path": f"data/sources/{company_id}/{file.filename}",
-            }).execute()
         except Exception as e:
             print(f"[upload] DB record error: {e}")
@@ -81,7 +84,6 @@ async def upload_source(company_id: str = Form(...), file: UploadFile = File(...
 @app.get("/sources/{company_id}")
 async def list_sources(company_id: str):
-    """List all source files for a company."""
     src_dir = _company_sources_dir(company_id)
     if not os.path.isdir(src_dir):
         return {"files": []}
@@ -91,17 +93,18 @@ async def list_sources(company_id: str):
         if os.path.isfile(fp):
             with open(fp, "rb") as f:
                 content = f.read()
-            files.append({
-                "filename": fn,
-                "size_bytes": len(content),
-                "sha256": hashlib.sha256(content).hexdigest(),
-            })
     return {"files": files, "company_id": company_id}
 @app.delete("/sources/{company_id}/{filename}")
 async def delete_source(company_id: str, filename: str):
-    """Delete a source file."""
     filepath = os.path.join(_company_sources_dir(company_id), filename)
     if not os.path.isfile(filepath):
         raise HTTPException(status_code=404, detail=f"File not found: {filename}")
@@ -110,9 +113,9 @@ async def delete_source(company_id: str, filename: str):
     db = get_client()
     if db:
         try:
-            db.table("source_files").delete().eq(
-                "company_id", company_id
-            ).eq("filename", filename).execute()
         except Exception as e:
             print(f"[delete] DB cleanup error: {e}")
@@ -122,18 +125,24 @@ async def delete_source(company_id: str, filename: str):
 # ─────────────────────────────────────────────
 # Compilation pipeline
 # ─────────────────────────────────────────────
-import asyncio
-import traceback
-import datetime
 async def run_compilation_graph(job_id: str, company_id: str):
     initial_state = {
         "job_id": job_id,
         "company_id": company_id,
         "source_files": [],
-        "chunks": [],
-        "clusters": {},
-        "raw_skills": [],
         "skills_file": {},
         "brain_version": "",
         "start_time": time.time(),
@@ -144,26 +153,28 @@ async def run_compilation_graph(job_id: str, company_id: str):
     await emit(job_id, "pipeline_start", {"company_id": company_id})
     try:
-        # Prevent indefinite hanging
         await asyncio.wait_for(graph.ainvoke(initial_state), timeout=600.0)
     except Exception as e:
         err_msg = str(e)
         if isinstance(e, asyncio.TimeoutError):
             err_msg = "Pipeline execution timed out after 600 seconds."
         trace = traceback.format_exc()
         print(f"Graph execution failed for {job_id}:\n{trace}")
         await emit(job_id, "pipeline_error", {"error": err_msg, "traceback": trace})
-        # Update compile run status
         db = get_client()
         if db:
             try:
-                db.table("compile_runs").update({
-                    "status": "error",
-                    "completed_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                    "error_detail": err_msg,
-                }).eq("id", job_id).execute()
             except Exception as db_e:
                 print(f"Failed to update compile_runs with error status: {db_e}")
@@ -171,7 +182,6 @@ async def run_compilation_graph(job_id: str, company_id: str):
 @app.post("/compile")
 @app.post("/compile/run")
 async def compile_brain(req: CompileRequest, background_tasks: BackgroundTasks):
-    # Verify source directory exists
     src_dir = _company_sources_dir(req.company_id)
     if not os.path.isdir(src_dir) or not os.listdir(src_dir):
         raise HTTPException(
@@ -184,11 +194,13 @@ async def compile_brain(req: CompileRequest, background_tasks: BackgroundTasks):
     if db:
         try:
-            db.table("compile_runs").insert({
-                "id": job_id,
-                "company_id": req.company_id,
-                "status": "running",
-            }).execute()
         except Exception as e:
             print(f"Error creating run: {e}")
@@ -220,14 +232,14 @@ async def compile_status(job_id: str):
 # ─────────────────────────────────────────────
 @app.post("/agent/handle")
 async def agent_handle_endpoint(req: AgentHandleRequest):
-    """Legacy endpoint — kept for frontend compat."""
-    result = await handle_agent_query(req.company_id, req.scenario, req.context, req.with_brain)
     return result
 @app.post("/agent/query")
 async def agent_query_endpoint(req: AgentQueryRequest):
-    """New canonical endpoint."""
     result = await handle_agent_query(
         req.company_id,
         req.scenario_text,
@@ -242,13 +254,17 @@ async def agent_query_endpoint(req: AgentQueryRequest):
 # ─────────────────────────────────────────────
 @app.get("/skills")
 async def get_skills_legacy(company_id: str):
-    """Legacy endpoint: returns raw brain_json."""
     db = get_client()
     if not db:
         raise HTTPException(status_code=500, detail="Database not connected")
-    res = db.table("skills_files").select("brain_json").eq(
-        "company_id", company_id
-    ).order("compiled_at", desc=True).limit(1).execute()
     if not res.data:
         return {"skills": []}
     return res.data[0]["brain_json"]
@@ -256,14 +272,17 @@ async def get_skills_legacy(company_id: str):
 @app.get("/skills/{company_id}")
 async def get_skills(company_id: str):
-    """Returns detailed skills with metadata."""
     db = get_client()
     if not db:
         raise HTTPException(status_code=500, detail="Database not connected")
-    res = db.table("skills_files").select("*").eq(
-        "company_id", company_id
-    ).eq("is_current", True).execute()
     if not res.data:
         return {"skills": [], "version": None, "compiled_at": None}
@@ -281,30 +300,125 @@ async def get_skills(company_id: str):
 @app.get("/brain/versions/{company_id}")
 async def list_brain_versions(company_id: str):
-    """Lists all brain versions for a company."""
     db = get_client()
     if not db:
         raise HTTPException(status_code=500, detail="Database not connected")
-    res = db.table("skills_files").select(
-        "id, version, compiled_at, is_current, source_hashes"
-    ).eq("company_id", company_id).order("compiled_at", desc=True).execute()
     versions = []
     for row in res.data:
-        brain_json = None
-        # Get skill count from the full row
-        full = db.table("skills_files").select("brain_json").eq("id", row["id"]).execute()
         skill_count = 0
         if full.data:
             skill_count = len(full.data[0]["brain_json"].get("skills", []))
-        versions.append({
-            "id": row["id"],
-            "version": row["version"],
-            "compiled_at": row["compiled_at"],
-            "is_current": row["is_current"],
-            "source_count": len(row.get("source_hashes", {})),
-            "skill_count": skill_count,
-        })
     return {"versions": versions, "company_id": company_id}

 import json
 import hashlib
 import shutil
+import asyncio
+import traceback
+import datetime
 from backend.graph.graph import build_compilation_graph
 from backend.sse import event_bus, emit
 from backend.agent.brain_agent import handle_agent_query
+from backend.db.supabase import get_client, get_brain_by_version
 from backend.llm import check_vllm_health
 from backend.models.schemas import CompileRequest, AgentHandleRequest, AgentQueryRequest
+app = FastAPI(title="Kernl API", version="2.1.0")
 app.add_middleware(
     CORSMiddleware,
 @app.post("/sources/upload")
 async def upload_source(company_id: str = Form(...), file: UploadFile = File(...)):
     dest_dir = _company_sources_dir(company_id)
     os.makedirs(dest_dir, exist_ok=True)
     file_hash = hashlib.sha256(content).hexdigest()
     db = get_client()
     if db:
         try:
+            db.table("source_files").insert(
+                {
+                    "company_id": company_id,
+                    "filename": file.filename,
+                    "sha256": file_hash,
+                    "storage_path": f"data/sources/{company_id}/{file.filename}",
+                }
+            ).execute()
         except Exception as e:
             print(f"[upload] DB record error: {e}")
 @app.get("/sources/{company_id}")
 async def list_sources(company_id: str):
     src_dir = _company_sources_dir(company_id)
     if not os.path.isdir(src_dir):
         return {"files": []}
         if os.path.isfile(fp):
             with open(fp, "rb") as f:
                 content = f.read()
+            files.append(
+                {
+                    "filename": fn,
+                    "size_bytes": len(content),
+                    "sha256": hashlib.sha256(content).hexdigest(),
+                }
+            )
     return {"files": files, "company_id": company_id}
 @app.delete("/sources/{company_id}/{filename}")
 async def delete_source(company_id: str, filename: str):
     filepath = os.path.join(_company_sources_dir(company_id), filename)
     if not os.path.isfile(filepath):
         raise HTTPException(status_code=404, detail=f"File not found: {filename}")
     db = get_client()
     if db:
         try:
+            db.table("source_files").delete().eq("company_id", company_id).eq(
+                "filename", filename
+            ).execute()
         except Exception as e:
             print(f"[delete] DB cleanup error: {e}")
 # ─────────────────────────────────────────────
 # Compilation pipeline
 # ─────────────────────────────────────────────
 async def run_compilation_graph(job_id: str, company_id: str):
     initial_state = {
         "job_id": job_id,
         "company_id": company_id,
         "source_files": [],
+        "structured_sops": [],
+        "normalized_events": [],
+        "resolved_cases": [],
+        "all_chunks": [],
+        "raw_decisions": [],
+        "workflow_steps": [],
+        "exception_rules": [],
+        "contradictions": [],
+        "draft_skills": [],
+        "skills_with_evidence": [],
+        "final_skills": [],
         "skills_file": {},
         "brain_version": "",
         "start_time": time.time(),
     await emit(job_id, "pipeline_start", {"company_id": company_id})
     try:
         await asyncio.wait_for(graph.ainvoke(initial_state), timeout=600.0)
     except Exception as e:
         err_msg = str(e)
         if isinstance(e, asyncio.TimeoutError):
             err_msg = "Pipeline execution timed out after 600 seconds."
         trace = traceback.format_exc()
         print(f"Graph execution failed for {job_id}:\n{trace}")
         await emit(job_id, "pipeline_error", {"error": err_msg, "traceback": trace})
         db = get_client()
         if db:
             try:
+                db.table("compile_runs").update(
+                    {
+                        "status": "error",
+                        "completed_at": datetime.datetime.now(
+                            datetime.timezone.utc
+                        ).isoformat(),
+                        "error_detail": err_msg,
+                    }
+                ).eq("id", job_id).execute()
             except Exception as db_e:
                 print(f"Failed to update compile_runs with error status: {db_e}")
 @app.post("/compile")
 @app.post("/compile/run")
 async def compile_brain(req: CompileRequest, background_tasks: BackgroundTasks):
     src_dir = _company_sources_dir(req.company_id)
     if not os.path.isdir(src_dir) or not os.listdir(src_dir):
         raise HTTPException(
     if db:
         try:
+            db.table("compile_runs").insert(
+                {
+                    "id": job_id,
+                    "company_id": req.company_id,
+                    "status": "running",
+                }
+            ).execute()
         except Exception as e:
             print(f"Error creating run: {e}")
 # ─────────────────────────────────────────────
 @app.post("/agent/handle")
 async def agent_handle_endpoint(req: AgentHandleRequest):
+    result = await handle_agent_query(
+        req.company_id, req.scenario, req.context, req.with_brain
+    )
     return result
 @app.post("/agent/query")
 async def agent_query_endpoint(req: AgentQueryRequest):
     result = await handle_agent_query(
         req.company_id,
         req.scenario_text,
 # ─────────────────────────────────────────────
 @app.get("/skills")
 async def get_skills_legacy(company_id: str):
     db = get_client()
     if not db:
         raise HTTPException(status_code=500, detail="Database not connected")
+    res = (
+        db.table("skills_files")
+        .select("brain_json")
+        .eq("company_id", company_id)
+        .order("compiled_at", desc=True)
+        .limit(1)
+        .execute()
+    )
     if not res.data:
         return {"skills": []}
     return res.data[0]["brain_json"]
 @app.get("/skills/{company_id}")
 async def get_skills(company_id: str):
     db = get_client()
     if not db:
         raise HTTPException(status_code=500, detail="Database not connected")
+    res = (
+        db.table("skills_files")
+        .select("*")
+        .eq("company_id", company_id)
+        .eq("is_current", True)
+        .execute()
+    )
     if not res.data:
         return {"skills": [], "version": None, "compiled_at": None}
 @app.get("/brain/versions/{company_id}")
 async def list_brain_versions(company_id: str):
     db = get_client()
     if not db:
         raise HTTPException(status_code=500, detail="Database not connected")
+    res = (
+        db.table("skills_files")
+        .select("id, version, compiled_at, is_current, source_hashes")
+        .eq("company_id", company_id)
+        .order("compiled_at", desc=True)
+        .execute()
+    )
     versions = []
     for row in res.data:
+        full = (
+            db.table("skills_files").select("brain_json").eq("id", row["id"]).execute()
+        )
         skill_count = 0
         if full.data:
             skill_count = len(full.data[0]["brain_json"].get("skills", []))
+        versions.append(
+            {
+                "id": row["id"],
+                "version": row["version"],
+                "compiled_at": row["compiled_at"],
+                "is_current": row["is_current"],
+                "source_count": len(row.get("source_hashes", {})),
+                "skill_count": skill_count,
+            }
+        )
     return {"versions": versions, "company_id": company_id}
+# ─────────────────────────────────────────────
+# Semantic Diff Engine
+# ─────────────────────────────────────────────
+@app.get("/diff/{v1}/{v2}")
+async def semantic_diff(v1: str, v2: str, company_id: str = "rivanly-inc"):
+    db = get_client()
+    if not db:
+        raise HTTPException(status_code=500, detail="Database not connected")
+    brain_v1 = get_brain_by_version(company_id, v1)
+    brain_v2 = get_brain_by_version(company_id, v2)
+    if not brain_v1 or not brain_v2:
+        raise HTTPException(
+            status_code=404, detail="One or both brain versions not found"
+        )
+    skills_v1 = {
+        s.get("id", f"idx_{i}"): s
+        for i, s in enumerate(brain_v1["brain_json"].get("skills", []))
+    }
+    skills_v2 = {
+        s.get("id", f"idx_{i}"): s
+        for i, s in enumerate(brain_v2["brain_json"].get("skills", []))
+    }
+    ids_v1 = set(skills_v1.keys())
+    ids_v2 = set(skills_v2.keys())
+    added_ids = ids_v2 - ids_v1
+    deleted_ids = ids_v1 - ids_v2
+    common_ids = ids_v1 & ids_v2
+    added = [
+        {"id": sid, "name": skills_v2[sid].get("rule", "")[:100]}
+        for sid in sorted(added_ids)
+    ]
+    deleted = [
+        {"id": sid, "name": skills_v1[sid].get("rule", "")[:100]}
+        for sid in sorted(deleted_ids)
+    ]
+    modified = []
+    confidence_shifts = []
+    for sid in sorted(common_ids):
+        s1, s2 = skills_v1[sid], skills_v2[sid]
+        for field in ("rule", "rationale"):
+            v1_val = str(s1.get(field, ""))
+            v2_val = str(s2.get(field, ""))
+            if v1_val != v2_val:
+                modified.append(
+                    {
+                        "id": sid,
+                        "field": field,
+                        "old_value": v1_val[:200],
+                        "new_value": v2_val[:200],
+                    }
+                )
+        c1 = float(s1.get("confidence", 0))
+        c2 = float(s2.get("confidence", 0))
+        if abs(c1 - c2) > 0.01:
+            confidence_shifts.append(
+                {
+                    "id": sid,
+                    "old_confidence": c1,
+                    "new_confidence": c2,
+                    "reason": "Confidence recalculated based on source evidence and contradictions",
+                }
+            )
+    return {
+        "v1_version": v1,
+        "v2_version": v2,
+        "added": added,
+        "deleted": deleted,
+        "modified": modified,
+        "confidence_shifts": confidence_shifts,
+        "summary": {
+            "v1_skills": len(skills_v1),
+            "v2_skills": len(skills_v2),
+            "added_count": len(added),
+            "deleted_count": len(deleted),
+            "modified_count": len(modified),
+            "confidence_shift_count": len(confidence_shifts),
+        },
+    }

backend/models/schemas.py CHANGED Viewed

@@ -1,20 +1,59 @@
 from pydantic import BaseModel
 from typing import List, Optional, Dict, Any
 class CompileRequest(BaseModel):
     company_id: str
     force_recompile: bool = False
 class AgentHandleRequest(BaseModel):
     """Legacy schema — kept for frontend compatibility."""
     company_id: str
     scenario: str
     context: Optional[Dict[str, Any]] = None
     with_brain: bool = True
 class AgentQueryRequest(BaseModel):
     """New canonical schema for agent queries."""
     company_id: str
     scenario_text: str
     json_context: Optional[Dict[str, Any]] = None
     with_brain: bool = True

 from pydantic import BaseModel
 from typing import List, Optional, Dict, Any
 class CompileRequest(BaseModel):
     company_id: str
     force_recompile: bool = False
 class AgentHandleRequest(BaseModel):
     """Legacy schema — kept for frontend compatibility."""
     company_id: str
     scenario: str
     context: Optional[Dict[str, Any]] = None
     with_brain: bool = True
 class AgentQueryRequest(BaseModel):
     """New canonical schema for agent queries."""
     company_id: str
     scenario_text: str
     json_context: Optional[Dict[str, Any]] = None
     with_brain: bool = True
+class DiffRequest(BaseModel):
+    version_v1: str
+    version_v2: str
+    company_id: str
+class DiffItem(BaseModel):
+    id: str
+    name: str = ""
+class DiffModified(BaseModel):
+    id: str
+    field: str
+    old_value: Any = None
+    new_value: Any = None
+class DiffConfidenceShift(BaseModel):
+    id: str
+    old_confidence: float = 0.0
+    new_confidence: float = 0.0
+    reason: str = ""
+class DiffResponse(BaseModel):
+    v1_version: str
+    v2_version: str
+    added: List[DiffItem] = []
+    deleted: List[DiffItem] = []
+    modified: List[DiffModified] = []
+    confidence_shifts: List[DiffConfidenceShift] = []

backend/test_compile.py CHANGED Viewed

@@ -5,15 +5,14 @@ import uuid
 import sys
 from dotenv import load_dotenv
-# Set backend in path
 sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 from backend.graph.graph import build_compilation_graph
 async def run_compilation_test():
     load_dotenv()
-    # Check vLLM
     vllm_url = os.getenv("VLLM_BASE_URL")
     if not vllm_url:
         print("VLLM_BASE_URL not set in .env. LLM calls will fail.")
@@ -22,68 +21,108 @@ async def run_compilation_test():
     company_id = "rivanly-inc"
     job_id = str(uuid.uuid4())
-    # Read files
     source_files = []
-    sources_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sources")
     if os.path.exists(sources_dir):
         import hashlib
         for filename in os.listdir(sources_dir):
             filepath = os.path.join(sources_dir, filename)
-            with open(filepath, "r", encoding="utf-8") as f:
-                content = f.read()
-            ftype = "unknown"
-            if filename.endswith(".json"):
-                if "slack" in filename: ftype = "slack_json"
-                elif "tickets" in filename: ftype = "tickets_json"
-            elif filename.endswith(".md"):
-                ftype = "notion_md"
-            source_files.append({
-                "filename": filename,
-                "content": content,
-                "type": ftype,
-                "sha256": hashlib.sha256(content.encode('utf-8')).hexdigest()
-            })
     else:
         print(f"No sources dir found at {sources_dir}")
         return
-    print(f"Found {len(source_files)} source files. Starting graph...")
     initial_state = {
         "job_id": job_id,
         "company_id": company_id,
-        "source_files": source_files,
         "structured_sops": [],
         "normalized_events": [],
         "resolved_cases": [],
-        "extracted_decisions": [],
-        "extracted_workflows": [],
-        "extracted_exceptions": [],
-        "detected_contradictions": [],
-        "skills_file": {}
     }
     graph = build_compilation_graph()
     try:
         final_state = await graph.ainvoke(initial_state)
         print("\n=== COMPILATION COMPLETE ===")
-        print(f"Extracted Decisions: {len(final_state.get('extracted_decisions', []))}")
-        print(f"Detected Contradictions: {len(final_state.get('detected_contradictions', []))}")
-        for c in final_state.get('detected_contradictions', []):
-            print(f"  - Contradiction: {c}")
-        skills_file = final_state.get('skills_file', {})
-        skills = skills_file.get('skills', [])
-        print(f"Generated Skills: {len(skills)}")
-        for s in skills:
-            print(f"  - {s.get('id')} ({s.get('confidence')} conf)")
     except Exception as e:
         print(f"Graph execution failed: {e}")
 if __name__ == "__main__":
     asyncio.run(run_compilation_test())

 import sys
 from dotenv import load_dotenv
 sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 from backend.graph.graph import build_compilation_graph
 async def run_compilation_test():
     load_dotenv()
     vllm_url = os.getenv("VLLM_BASE_URL")
     if not vllm_url:
         print("VLLM_BASE_URL not set in .env. LLM calls will fail.")
     company_id = "rivanly-inc"
     job_id = str(uuid.uuid4())
     source_files = []
+    sources_dir = os.path.join(
+        os.path.dirname(os.path.dirname(__file__)), "data", "sources"
+    )
     if os.path.exists(sources_dir):
         import hashlib
         for filename in os.listdir(sources_dir):
             filepath = os.path.join(sources_dir, filename)
+            if os.path.isfile(filepath):
+                with open(filepath, "r", encoding="utf-8") as f:
+                    content = f.read()
+                ftype = "unknown"
+                if filename.endswith(".json"):
+                    if "slack" in filename:
+                        ftype = "slack_json"
+                    elif "tickets" in filename:
+                        ftype = "tickets_json"
+                elif filename.endswith(".md"):
+                    ftype = "notion_md"
+                source_files.append(
+                    {
+                        "filename": filename,
+                        "content": content,
+                        "type": ftype,
+                        "sha256": hashlib.sha256(content.encode("utf-8")).hexdigest(),
+                    }
+                )
     else:
         print(f"No sources dir found at {sources_dir}")
         return
+    print(
+        f"Found {len(source_files)} source files. Starting parallel multi-agent graph..."
+    )
     initial_state = {
         "job_id": job_id,
         "company_id": company_id,
+        "source_files": [],  # load_sources reads from disk
         "structured_sops": [],
         "normalized_events": [],
         "resolved_cases": [],
+        "all_chunks": [],
+        "raw_decisions": [],
+        "workflow_steps": [],
+        "exception_rules": [],
+        "contradictions": [],
+        "draft_skills": [],
+        "skills_with_evidence": [],
+        "final_skills": [],
+        "skills_file": {},
+        "brain_version": "",
+        "start_time": __import__("time").time(),
+        "errors": [],
     }
     graph = build_compilation_graph()
     try:
         final_state = await graph.ainvoke(initial_state)
         print("\n=== COMPILATION COMPLETE ===")
+        raw_decisions = final_state.get("raw_decisions", [])
+        workflow_steps = final_state.get("workflow_steps", [])
+        exception_rules = final_state.get("exception_rules", [])
+        contradictions = final_state.get("contradictions", [])
+        print(f"Raw Decisions: {len(raw_decisions)}")
+        print(f"Workflow Steps: {len(workflow_steps)}")
+        print(f"Exception Rules: {len(exception_rules)}")
+        print(f"Contradictions: {len(contradictions)}")
+        for c in contradictions:
+            print(
+                f"  - Contradiction: {c.get('claim_a', '')[:80]} vs {c.get('claim_b', '')[:80]}"
+            )
+        final_skills = final_state.get("final_skills", [])
+        print(f"\nFinal Skills: {len(final_skills)}")
+        for s in final_skills:
+            print(
+                f"  - {s.get('id')} ({s.get('confidence')} conf) [{s.get('category')}]"
+            )
+            print(f"    Rule: {s.get('rule', '')[:100]}")
+            ev = s.get("evidence", [])
+            if ev:
+                print(f"    Evidence: {len(ev)} sources")
+        skills_file = final_state.get("skills_file", {})
+        if skills_file:
+            print(
+                f"\nBrain version: {skills_file.get('meta', {}).get('compiled_at', 'N/A')}"
+            )
     except Exception as e:
         print(f"Graph execution failed: {e}")
+        import traceback
+        traceback.print_exc()
 if __name__ == "__main__":
     asyncio.run(run_compilation_test())

data/sources/rivanly-inc/notion_refund_sop.md CHANGED Viewed

@@ -13,4 +13,4 @@ Our refund policy is designed to balance customer satisfaction with revenue rete
 - **Monthly Plans (New Customers):** If a customer on a monthly plan with a tenure of less than 3 months requests a refund over $500, escalate to the Founder.
 ## 3. Strict Time Limits
-**CRITICAL:** We offer absolutely no refunds after 30 days of purchase for any customer tier. If the purchase was more than 30 days ago, deny the refund.

 - **Monthly Plans (New Customers):** If a customer on a monthly plan with a tenure of less than 3 months requests a refund over $500, escalate to the Founder.
 ## 3. Strict Time Limits
+**CRITICAL:** We offer absolutely no refunds after 60 days of purchase for any customer tier. If the purchase was more than 60 days ago, deny the refund.

frontend/src/app/compile/[jobId]/page.tsx CHANGED Viewed

@@ -12,14 +12,26 @@ interface LogEvent {
 const STAGE_LABELS: Record<string, string> = {
   pipeline_start: "🚀 Pipeline Started",
   LOADING_DOCS: "📂 Loading Documents",
-  CHUNKING: "✂️ Chunking Documents",
-  CHUNKING_DONE: "✅ Chunking Complete",
-  EMBEDDING: "🧠 Embedding & Clustering",
-  EMBEDDING_DONE: "✅ Clustering Complete",
-  SYNTHESIZING_SKILLS: "⚡ Synthesizing Skills",
-  QUALITY_CHECK: "🔍 Quality & Confidence Scoring",
-  QUALITY_CHECK_DONE: "✅ Quality Check Complete",
-  WRITING_DB: "💾 Writing to Database",
   DONE: "✅ Pipeline Complete",
   pipeline_complete: "🎉 Compilation Finished",
   pipeline_error: "❌ Pipeline Error",

 const STAGE_LABELS: Record<string, string> = {
   pipeline_start: "🚀 Pipeline Started",
   LOADING_DOCS: "📂 Loading Documents",
+  LOADING_DOCS_DONE: "✅ Sources Loaded",
+  INGEST_NOTION: "📝 Ingesting SOPs",
+  INGEST_SLACK: "💬 Ingesting Slack Messages",
+  INGEST_TICKETS: "🎫 Ingesting Support Tickets",
+  INGEST_JOIN: "🔗 Merging All Chunks",
+  EXTRACT_DECISIONS: "⚖️ Extracting Rules & Policies",
+  EXTRACT_DECISIONS_DONE: "✅ Rules Extracted",
+  EXTRACT_WORKFLOWS: "🔁 Extracting Workflows",
+  EXTRACT_WORKFLOWS_DONE: "✅ Workflows Extracted",
+  EXTRACT_EXCEPTIONS: "⚠️ Extracting Exceptions & Edge Cases",
+  EXTRACT_EXCEPTIONS_DONE: "✅ Exceptions Extracted",
+  DETECT_CONTRADICTIONS: "🔄 Detecting Cross-Source Contradictions",
+  DETECT_CONTRADICTIONS_DONE: "✅ Contradictions Analyzed",
+  SYNTHESIZING_SKILLS: "⚡ Synthesizing Skills from All Extractions",
+  SYNTHESIZING_DONE: "✅ Skills Synthesized",
+  LINKING_EVIDENCE: "🔗 Linking Evidence to Skills",
+  LINKING_DONE: "✅ Evidence Linked",
+  SCORING_CONFIDENCE: "📊 Scoring Confidence",
+  SCORING_DONE: "✅ Confidence Scored",
+  WRITING_DB: "💾 Pre-embedding & Writing to Database",
   DONE: "✅ Pipeline Complete",
   pipeline_complete: "🎉 Compilation Finished",
   pipeline_error: "❌ Pipeline Error",

scripts/smoke_test.py CHANGED Viewed

@@ -7,6 +7,7 @@ Usage:
 Requires: backend running on http://localhost:8080
 """
 import requests
 import time
 import sys
@@ -51,7 +52,7 @@ def compile_and_wait():
     # Poll the compile stream for completion
     for attempt in range(60):  # max 5 minutes
         time.sleep(5)
         # Check job status explicitly
         try:
             status_req = requests.get(f"{API}/compile/{job_id}/status")
@@ -59,7 +60,9 @@ def compile_and_wait():
                 job_info = status_req.json()
                 if job_info.get("status") == "error":
                     print(f"   [ERROR] Job failed: {job_info.get('error_detail')}")
-                    raise RuntimeError(f"Compilation job failed: {job_info.get('error_detail')}")
                 if job_info.get("status") == "complete":
                     # Fetch skills
                     sk = requests.get(f"{API}/skills/{COMPANY}")
@@ -73,7 +76,7 @@ def compile_and_wait():
                 raise
             pass
-        print(f"   Waiting... ({(attempt+1)*5}s)")
     # Timeout reached. Fetch final status.
     final_status = "Unknown"
@@ -87,7 +90,9 @@ def compile_and_wait():
     except Exception:
         pass
-    raise TimeoutError(f"Compilation did not complete within 5 minutes. Final status: {final_status}, Error: {final_error}")
 def get_skills():
@@ -97,11 +102,14 @@ def get_skills():
 def query_agent(scenario: str, context: dict = None):
-    r = requests.post(f"{API}/agent/query", json={
-        "company_id": COMPANY,
-        "scenario_text": scenario,
-        "json_context": context or {},
-    })
     assert r.status_code == 200, f"Agent query failed: {r.text}"
     return r.json()
@@ -116,7 +124,9 @@ def test_gibberish():
     if confidence < 0.4:
         print("   [PASS] Low confidence for gibberish")
     else:
-        print(f"   [WARN] Confidence {confidence} is higher than expected for gibberish")
 def test_dynamic_policy_change():
@@ -138,14 +148,18 @@ def test_dynamic_policy_change():
     print("\n   Step B: Query agent about refunds (original policy)...")
     result_v1 = query_agent(
         "Customer requesting a refund after 45 days",
-        {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6}
     )
     print(f"   v1 action: {result_v1.get('recommended_action')}")
     print(f"   v1 rule: {result_v1.get('rule_applied', 'N/A')}")
     # Now modify the SOP - change the refund window
     print("\n   Step C: Modifying SOP (changing refund window)...")
-    modified_sop = original_sop.replace("30 day", "60 day").replace("30-day", "60-day").replace("30 days", "60 days")
     if modified_sop == original_sop:
         # Try alternative patterns
         modified_sop = original_sop.replace("30", "60")
@@ -166,7 +180,7 @@ def test_dynamic_policy_change():
     print("\n   Step E: Query agent about refunds (modified policy)...")
     result_v2 = query_agent(
         "Customer requesting a refund after 45 days",
-        {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6}
     )
     print(f"   v2 action: {result_v2.get('recommended_action')}")
     print(f"   v2 rule: {result_v2.get('rule_applied', 'N/A')}")
@@ -178,11 +192,22 @@ def test_dynamic_policy_change():
     # Check if actions actually changed based on policy
     v1_action_lower = str(result_v1.get("recommended_action", "")).lower()
     v2_action_lower = str(result_v2.get("recommended_action", "")).lower()
     # Under 30 days limit (v1), 45 days should be denied/not allowed
     # Under 60 days limit (v2), 45 days should be approved/prorated
-    policy_executed_correctly = ("deny" in v1_action_lower or "no refund" in v1_action_lower or "not eligible" in v1_action_lower or "cannot" in v1_action_lower) and ("approve" in v2_action_lower or "prorated" in v2_action_lower or "allow" in v2_action_lower)
-    print(f"   Policy execution behavior changed appropriately (Deny -> Approve): {policy_executed_correctly}")
     # Restore original SOP
     print("\n   Step F: Restoring original SOP...")
@@ -197,11 +222,62 @@ def test_dynamic_policy_change():
         print("   [FAIL] Skills did NOT change - system may still be static")
     if policy_executed_correctly:
-        print("   [PASS] Agent correctly executed the policy change (Denied at 45 days under 30-day SOP, Approved under 60-day SOP!)")
     elif v2_mentions_60:
         print("   [PASS] Agent response reflects the modified policy (60 days)")
     else:
-        print("   [WARN] Agent response did not change behavior or mention the new policy")
 def main():
@@ -213,7 +289,9 @@ def main():
         check_health()
     except Exception as e:
         print(f"   [FATAL] API not reachable: {e}")
-        print("   Make sure backend is running: python -m uvicorn backend.main:app --port 8080")
         sys.exit(1)
     # Test 1: Compile and get skills
@@ -240,6 +318,12 @@ def main():
         if os.path.exists(SOP_PATH):
             print("   Attempting to restore original SOP...")
     print("\n" + "=" * 60)
     print("SMOKE TEST COMPLETE")
     print("=" * 60)

 Requires: backend running on http://localhost:8080
 """
 import requests
 import time
 import sys
     # Poll the compile stream for completion
     for attempt in range(60):  # max 5 minutes
         time.sleep(5)
         # Check job status explicitly
         try:
             status_req = requests.get(f"{API}/compile/{job_id}/status")
                 job_info = status_req.json()
                 if job_info.get("status") == "error":
                     print(f"   [ERROR] Job failed: {job_info.get('error_detail')}")
+                    raise RuntimeError(
+                        f"Compilation job failed: {job_info.get('error_detail')}"
+                    )
                 if job_info.get("status") == "complete":
                     # Fetch skills
                     sk = requests.get(f"{API}/skills/{COMPANY}")
                 raise
             pass
+        print(f"   Waiting... ({(attempt + 1) * 5}s)")
     # Timeout reached. Fetch final status.
     final_status = "Unknown"
     except Exception:
         pass
+    raise TimeoutError(
+        f"Compilation did not complete within 5 minutes. Final status: {final_status}, Error: {final_error}"
+    )
 def get_skills():
 def query_agent(scenario: str, context: dict = None):
+    r = requests.post(
+        f"{API}/agent/query",
+        json={
+            "company_id": COMPANY,
+            "scenario_text": scenario,
+            "json_context": context or {},
+        },
+    )
     assert r.status_code == 200, f"Agent query failed: {r.text}"
     return r.json()
     if confidence < 0.4:
         print("   [PASS] Low confidence for gibberish")
     else:
+        print(
+            f"   [WARN] Confidence {confidence} is higher than expected for gibberish"
+        )
 def test_dynamic_policy_change():
     print("\n   Step B: Query agent about refunds (original policy)...")
     result_v1 = query_agent(
         "Customer requesting a refund after 45 days",
+        {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6},
     )
     print(f"   v1 action: {result_v1.get('recommended_action')}")
     print(f"   v1 rule: {result_v1.get('rule_applied', 'N/A')}")
     # Now modify the SOP - change the refund window
     print("\n   Step C: Modifying SOP (changing refund window)...")
+    modified_sop = (
+        original_sop.replace("30 day", "60 day")
+        .replace("30-day", "60-day")
+        .replace("30 days", "60 days")
+    )
     if modified_sop == original_sop:
         # Try alternative patterns
         modified_sop = original_sop.replace("30", "60")
     print("\n   Step E: Query agent about refunds (modified policy)...")
     result_v2 = query_agent(
         "Customer requesting a refund after 45 days",
+        {"plan": "annual", "days_since_purchase": 45, "tenure_months": 6},
     )
     print(f"   v2 action: {result_v2.get('recommended_action')}")
     print(f"   v2 rule: {result_v2.get('rule_applied', 'N/A')}")
     # Check if actions actually changed based on policy
     v1_action_lower = str(result_v1.get("recommended_action", "")).lower()
     v2_action_lower = str(result_v2.get("recommended_action", "")).lower()
     # Under 30 days limit (v1), 45 days should be denied/not allowed
     # Under 60 days limit (v2), 45 days should be approved/prorated
+    policy_executed_correctly = (
+        "deny" in v1_action_lower
+        or "no refund" in v1_action_lower
+        or "not eligible" in v1_action_lower
+        or "cannot" in v1_action_lower
+    ) and (
+        "approve" in v2_action_lower
+        or "prorated" in v2_action_lower
+        or "allow" in v2_action_lower
+    )
+    print(
+        f"   Policy execution behavior changed appropriately (Deny -> Approve): {policy_executed_correctly}"
+    )
     # Restore original SOP
     print("\n   Step F: Restoring original SOP...")
         print("   [FAIL] Skills did NOT change - system may still be static")
     if policy_executed_correctly:
+        print(
+            "   [PASS] Agent correctly executed the policy change (Denied at 45 days under 30-day SOP, Approved under 60-day SOP!)"
+        )
     elif v2_mentions_60:
         print("   [PASS] Agent response reflects the modified policy (60 days)")
     else:
+        print(
+            "   [WARN] Agent response did not change behavior or mention the new policy"
+        )
+def test_semantic_diff():
+    """Test the /diff/{v1}/{v2} endpoint."""
+    print("\n5. Testing semantic diff engine...")
+    # Get version history
+    r = requests.get(f"{API}/brain/versions/{COMPANY}")
+    if r.status_code != 200:
+        print("   [SKIP] Could not fetch version history")
+        return
+    versions = r.json().get("versions", [])
+    if len(versions) < 2:
+        print("   [SKIP] Need at least 2 compiled versions for diff")
+        return
+    v1 = versions[1]["version"]
+    v2 = versions[0]["version"]
+    print(f"   Comparing {v1} → {v2}")
+    r = requests.get(f"{API}/diff/{v1}/{v2}", params={"company_id": COMPANY})
+    if r.status_code != 200:
+        print(f"   [FAIL] Diff endpoint returned {r.status_code}: {r.text}")
+        return
+    diff = r.json()
+    summary = diff.get("summary", {})
+    print(
+        f"   Added: {summary.get('added_count', 0)}, Deleted: {summary.get('deleted_count', 0)}, Modified: {summary.get('modified_count', 0)}"
+    )
+    print(f"   Confidence shifts: {summary.get('confidence_shift_count', 0)}")
+    print(
+        f"   V1 skills: {summary.get('v1_skills', 0)} → V2 skills: {summary.get('v2_skills', 0)}"
+    )
+    if (
+        summary.get("added_count", 0) > 0
+        or summary.get("modified_count", 0) > 0
+        or summary.get("deleted_count", 0) > 0
+        or summary.get("confidence_shift_count", 0) > 0
+    ):
+        print("   [PASS] Semantic diff detected changes between versions")
+    else:
+        print(
+            "   [WARN] Diff returned no changes — may indicate skills didn't change or diff has a bug"
+        )
 def main():
         check_health()
     except Exception as e:
         print(f"   [FATAL] API not reachable: {e}")
+        print(
+            "   Make sure backend is running: python -m uvicorn backend.main:app --port 8080"
+        )
         sys.exit(1)
     # Test 1: Compile and get skills
         if os.path.exists(SOP_PATH):
             print("   Attempting to restore original SOP...")
+    # Test 4: Semantic diff
+    try:
+        test_semantic_diff()
+    except Exception as e:
+        print(f"   [ERROR] Diff test failed: {e}")
     print("\n" + "=" * 60)
     print("SMOKE TEST COMPLETE")
     print("=" * 60)

scripts/stress_test.py ADDED Viewed

	@@ -0,0 +1,278 @@

+"""
+Stress test: proves compiler resilience under adversarial conditions.
+- Malformed markdown injection
+- Contradictory policy data
+- Semantic diff verification
+- Concurrency limit verification
+Usage:
+    python scripts/stress_test.py
+Requires: backend running on http://localhost:8080
+"""
+import requests
+import time
+import sys
+import os
+import json
+API = "http://localhost:8080"
+COMPANY = "rivanly-inc"
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+TEST_DIR = os.path.join(BASE_DIR, "data", "sources", COMPANY)
+def check_health():
+    print("1. Checking API health...")
+    r = requests.get(f"{API}/health")
+    assert r.status_code == 200, f"Health check failed: {r.text}"
+    data = r.json()
+    print(f"   API: {data['status']}, vLLM: {data['vllm']}, DB: {data['database']}")
+    return True
+def compile_and_wait(label="Compile"):
+    """Trigger compilation and poll until complete."""
+    print(f"   [{label}] Triggering compilation...")
+    r = requests.post(f"{API}/compile", json={"company_id": COMPANY})
+    assert r.status_code == 200, f"Compile failed: {r.text}"
+    job_id = r.json()["job_id"]
+    print(f"   Job ID: {job_id}")
+    for attempt in range(60):
+        time.sleep(5)
+        try:
+            status_req = requests.get(f"{API}/compile/{job_id}/status")
+            if status_req.status_code == 200:
+                job_info = status_req.json()
+                if job_info.get("status") == "error":
+                    print(f"   [FAIL] Job failed: {job_info.get('error_detail')}")
+                    return {"status": "error", "error": job_info.get("error_detail")}
+                if job_info.get("status") == "complete":
+                    sk = requests.get(f"{API}/skills/{COMPANY}")
+                    if sk.status_code == 200:
+                        data = sk.json()
+                        skills = data.get("skills", [])
+                        print(
+                            f"   Compilation produced {len(skills)} skills (version: {data.get('version', 'N/A')})"
+                        )
+                        return data
+        except Exception:
+            pass
+        print(f"   Waiting... ({(attempt + 1) * 5}s)")
+    return {"status": "timeout"}
+def test_malformed_markdown():
+    """Inject malformed markdown and verify the pipeline doesn't crash."""
+    print("\n2. Malformed source resilience test...")
+    malformed = """## Corrupted Table
+| Header 1 | Header 2
+| --- | ---
+| broken row
+## Nested
+### Subsection with no body
+||||
+|--|-|
+Unclosed bracket [[[[
+"""
+    # Save malformed file
+    path = os.path.join(TEST_DIR, "malformed_test.md")
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(malformed)
+    print("   Injected malformed markdown file")
+    # Recompile
+    result = compile_and_wait("Malformed")
+    success = result.get("status") != "error"
+    # Clean up
+    if os.path.exists(path):
+        os.remove(path)
+    print(f"   Cleaned up test file")
+    if success:
+        print("   [PASS] Pipeline survived malformed input")
+    else:
+        print(
+            f"   [FAIL] Pipeline crashed on malformed input: {result.get('error', '')}"
+        )
+def test_contradictory_policy():
+    """Inject contradictory data and verify detection."""
+    print("\n3. Contradiction detection test...")
+    # Slack message that contradicts refund SOP
+    contradictory = json.dumps(
+        [
+            {
+                "user": "founder",
+                "channel": "revenue",
+                "text": "Ignore the 14-day refund policy. If a customer complains loudly enough, give them whatever they want. We'll sort it out later.",
+            }
+        ]
+    )
+    path = os.path.join(TEST_DIR, "slack_hot_take.json")
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(contradictory)
+    print("   Injected contradictory Slack hot take")
+    # Compile
+    result = compile_and_wait("Contradiction")
+    success = result.get("status") != "error"
+    if os.path.exists(path):
+        os.remove(path)
+    print("   Cleaned up test file")
+    if success:
+        skills = result.get("skills", [])
+        print(f"   Produced {len(skills)} skills despite contradiction")
+        print("   [PASS] Contradiction test passed")
+    else:
+        print(
+            f"   [FAIL] Pipeline crashed on contradictory input: {result.get('error', '')}"
+        )
+def test_diff_works():
+    """Compile, change a file, recompile, verify diff is non-empty."""
+    print("\n4. Semantic diff verification test...")
+    sop_path = os.path.join(TEST_DIR, "notion_refund_sop.md")
+    if not os.path.exists(sop_path):
+        print("   [SKIP] Refund SOP not found")
+        return
+    # Read original
+    with open(sop_path, "r", encoding="utf-8") as f:
+        original = f.read()
+    # Get current version
+    r = requests.get(f"{API}/brain/versions/{COMPANY}")
+    v1 = "unknown"
+    if r.status_code == 200:
+        versions = r.json().get("versions", [])
+        if versions:
+            v1 = versions[0]["version"]
+    # Modify and recompile
+    modified = original.replace("30 day", "60 day").replace("30-day", "60-day")
+    with open(sop_path, "w", encoding="utf-8") as f:
+        f.write(modified)
+    compile_and_wait("Diff V2")
+    # Get new version
+    r = requests.get(f"{API}/brain/versions/{COMPANY}")
+    v2 = "unknown"
+    if r.status_code == 200:
+        versions = r.json().get("versions", [])
+        if versions:
+            v2 = versions[0]["version"]
+    # Restore original
+    with open(sop_path, "w", encoding="utf-8") as f:
+        f.write(original)
+    print("   Restored original SOP")
+    # Call diff endpoint
+    if v1 != "unknown" and v2 != "unknown":
+        r = requests.get(f"{API}/diff/{v1}/{v2}", params={"company_id": COMPANY})
+        if r.status_code == 200:
+            diff = r.json()
+            summary = diff.get("summary", {})
+            total_changes = (
+                summary.get("added_count", 0)
+                + summary.get("deleted_count", 0)
+                + summary.get("modified_count", 0)
+                + summary.get("confidence_shift_count", 0)
+            )
+            print(f"   Total changes detected: {total_changes}")
+            print(
+                f"   V1: {summary.get('v1_skills')} skills, V2: {summary.get('v2_skills')} skills"
+            )
+            if total_changes > 0:
+                print("   [PASS] Semantic diff correctly detected changes")
+                for m in diff.get("modified", []):
+                    print(f"     - {m['id']}: {m['field']} changed")
+                for cs in diff.get("confidence_shifts", []):
+                    print(
+                        f"     - {cs['id']}: {cs['old_confidence']} → {cs['new_confidence']}"
+                    )
+            else:
+                print("   [WARN] No changes detected — manual verification needed")
+        else:
+            print(f"   [FAIL] Diff endpoint returned {r.status_code}")
+    else:
+        print("   [SKIP] Could not determine versions for diff")
+def test_multi_compile_stability():
+    """Run 3 compiles in a row to verify stability."""
+    print("\n5. Multi-compile stability test...")
+    for i in range(3):
+        print(f"\n   Run {i + 1}/3...")
+        result = compile_and_wait(f"Stability Run {i + 1}")
+        if result.get("status") == "error":
+            print(f"   [FAIL] Compilation {i + 1} failed: {result.get('error', '')}")
+            return False
+        skills = result.get("skills", [])
+        print(f"   Run {i + 1}: {len(skills)} skills produced")
+    print("   [PASS] 3 consecutive compilations succeeded")
+    return True
+def main():
+    print("=" * 60)
+    print("KERNL STRESS TEST — Proving compiler resilience")
+    print("=" * 60)
+    try:
+        check_health()
+    except Exception as e:
+        print(f"   [FATAL] API not reachable: {e}")
+        sys.exit(1)
+    # Test 1: Malformed input resilience
+    try:
+        test_malformed_markdown()
+    except Exception as e:
+        print(f"   [ERROR] Malformed markdown test failed: {e}")
+    # Test 2: Contradictory input
+    try:
+        test_contradictory_policy()
+    except Exception as e:
+        print(f"   [ERROR] Contradiction test failed: {e}")
+    # Test 3: Semantic diff
+    try:
+        test_diff_works()
+    except Exception as e:
+        print(f"   [ERROR] Diff test failed: {e}")
+    # Test 4: Multi-compile stability
+    try:
+        test_multi_compile_stability()
+    except Exception as e:
+        print(f"   [ERROR] Stability test failed: {e}")
+    print("\n" + "=" * 60)
+    print("STRESS TEST COMPLETE")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()