File size: 2,304 Bytes
a688aff
 
 
 
5f7dc7e
 
 
 
 
 
 
 
 
 
 
 
 
 
a688aff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f7dc7e
a688aff
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from backend.graph.state import BrainState
from backend.llm import safe_llm_json_call
from backend.sse import emit

MAX_CHUNK_CHARS = 12000


def _cap_chunks(chunks: list[dict]) -> str:
    parts = []
    chars = 0
    for c in chunks:
        text = c.get("text", "")
        if chars + len(text) > MAX_CHUNK_CHARS:
            break
        parts.append(text)
        chars += len(text)
    return "\n\n---\n\n".join(parts)


SYSTEM = """You are an exception extraction specialist. Your ONLY job is to extract EXCEPTIONS, EDGE CASES, CONSTRAINTS, CONDITIONAL RULES, and FORBIDDEN ACTIONS from company communications.

Output ONLY a JSON array. No preamble. No explanation. No markdown.
Each item must have exactly these fields:
  - id: short snake_case identifier (e.g., "no_ltd_refunds")
  - category: operational domain
  - condition: the specific condition that triggers this exception
  - action: what happens when this exception applies
  - rationale: why this exception exists
  - source_files: array of filenames this came from

If you find no exceptions, output: []
Example: [{"id": "no_ltd_refunds", "category": "Customer Support", "condition": "Customer has a lifetime deal account", "action": "Never process refunds for lifetime deal accounts", "rationale": "Explicitly stated in refund SOP as forbidden action", "source_files": ["notion_refund_sop.md"]}]"""


async def extract_exceptions(state: BrainState) -> dict:
    job_id = state["job_id"]
    chunks = state.get("all_chunks", [])

    print(f"[{job_id}] Node extract_exceptions: processing {len(chunks)} chunks")
    await emit(
        job_id,
        "stage",
        {
            "name": "EXTRACT_EXCEPTIONS",
            "detail": "Extracting exceptions and edge cases...",
        },
    )

    chunk_text = _cap_chunks(chunks)
    user = f"Extract all exceptions, edge cases, constraints, and forbidden actions from this company data:\n\n{chunk_text}"

    results = await safe_llm_json_call(SYSTEM, user, max_tokens=2048)

    print(f"[{job_id}] extract_exceptions: extracted {len(results)} exceptions")
    await emit(
        job_id,
        "stage",
        {
            "name": "EXTRACT_EXCEPTIONS_DONE",
            "detail": f"Found {len(results)} exceptions",
        },
    )
    return {"exception_rules": results}