kernl-backend / backend /graph /nodes /extract_exceptions.py
ALPHA0008's picture
feat: dashboard UI overhaul + auth flow + auto-company-load
5f7dc7e
from backend.graph.state import BrainState
from backend.llm import safe_llm_json_call
from backend.sse import emit
MAX_CHUNK_CHARS = 12000
def _cap_chunks(chunks: list[dict]) -> str:
parts = []
chars = 0
for c in chunks:
text = c.get("text", "")
if chars + len(text) > MAX_CHUNK_CHARS:
break
parts.append(text)
chars += len(text)
return "\n\n---\n\n".join(parts)
SYSTEM = """You are an exception extraction specialist. Your ONLY job is to extract EXCEPTIONS, EDGE CASES, CONSTRAINTS, CONDITIONAL RULES, and FORBIDDEN ACTIONS from company communications.
Output ONLY a JSON array. No preamble. No explanation. No markdown.
Each item must have exactly these fields:
- id: short snake_case identifier (e.g., "no_ltd_refunds")
- category: operational domain
- condition: the specific condition that triggers this exception
- action: what happens when this exception applies
- rationale: why this exception exists
- source_files: array of filenames this came from
If you find no exceptions, output: []
Example: [{"id": "no_ltd_refunds", "category": "Customer Support", "condition": "Customer has a lifetime deal account", "action": "Never process refunds for lifetime deal accounts", "rationale": "Explicitly stated in refund SOP as forbidden action", "source_files": ["notion_refund_sop.md"]}]"""
async def extract_exceptions(state: BrainState) -> dict:
job_id = state["job_id"]
chunks = state.get("all_chunks", [])
print(f"[{job_id}] Node extract_exceptions: processing {len(chunks)} chunks")
await emit(
job_id,
"stage",
{
"name": "EXTRACT_EXCEPTIONS",
"detail": "Extracting exceptions and edge cases...",
},
)
chunk_text = _cap_chunks(chunks)
user = f"Extract all exceptions, edge cases, constraints, and forbidden actions from this company data:\n\n{chunk_text}"
results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)
print(f"[{job_id}] extract_exceptions: extracted {len(results)} exceptions")
await emit(
job_id,
"stage",
{
"name": "EXTRACT_EXCEPTIONS_DONE",
"detail": f"Found {len(results)} exceptions",
},
)
return {"exception_rules": results}