kernl-backend / backend /graph /nodes /link_evidence.py
ALPHA0008's picture
refactor: replace sequential 3-node pipeline with parallel 13-node multi-agent architecture
a688aff
import json
from backend.graph.state import BrainState
from backend.llm import llm_call
from backend.sse import emit
async def link_evidence(state: BrainState) -> dict:
job_id = state["job_id"]
draft_skills = state.get("draft_skills", [])
chunks = state.get("all_chunks", [])
print(
f"[{job_id}] Node link_evidence: enriching {len(draft_skills)} skills with evidence"
)
await emit(
job_id,
"stage",
{
"name": "LINKING_EVIDENCE",
"detail": f"Linking evidence for {len(draft_skills)} skills",
},
)
if not draft_skills:
return {"skills_with_evidence": []}
prompt = """You are an evidence linking specialist. Below are draft operational skills and the original source chunks they were extracted from.
For each skill, find the most specific evidence excerpts from the source chunks that support it. Enrich each skill's evidence array with concrete quotes.
Return ONLY a JSON object:
{
"skills": [
{
"id": "skill_id",
"category": "...",
"rule": "...",
"rationale": "...",
"evidence": ["Exact quote from source that supports this rule"],
"source_files": ["filename.ext"]
}
]
}
Keep all existing fields intact. Only add or improve the evidence array."""
skills_text = json.dumps({"skills": draft_skills}, indent=2)
chunks_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks[:25]])
user_content = (
f"--- Skills ---\n{skills_text}\n\n--- Source Chunks ---\n{chunks_text}"
)
response_str = await llm_call(prompt, user_content, max_tokens=4096)
try:
clean = response_str.strip()
if clean.startswith("```json"):
clean = clean[7:]
elif clean.startswith("```"):
clean = clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
data = json.loads(clean.strip())
enriched = data.get("skills", draft_skills)
except Exception as e:
print(f"[{job_id}] [link_evidence] Parse error: {e}")
enriched = draft_skills
await emit(
job_id,
"stage",
{
"name": "LINKING_DONE",
"detail": f"Evidence linked for {len(enriched)} skills",
},
)
print(f"[{job_id}] link_evidence: done")
return {"skills_with_evidence": enriched}