Spaces:

lablab-ai-amd-developer-hackathon
/

kernl-backend

Sleeping

App Files Files Community

kernl-backend / backend /graph /nodes /link_evidence.py

ALPHA0008

refactor: replace sequential 3-node pipeline with parallel 13-node multi-agent architecture

a688aff 14 days ago

raw

history blame contribute delete

2.37 kB

	import json
	from backend.graph.state import BrainState
	from backend.llm import llm_call
	from backend.sse import emit


	async def link_evidence(state: BrainState) -> dict:
	job_id = state["job_id"]
	draft_skills = state.get("draft_skills", [])
	chunks = state.get("all_chunks", [])

	print(
	f"[{job_id}] Node link_evidence: enriching {len(draft_skills)} skills with evidence"
	)
	await emit(
	job_id,
	"stage",
	{
	"name": "LINKING_EVIDENCE",
	"detail": f"Linking evidence for {len(draft_skills)} skills",
	},
	)

	if not draft_skills:
	return {"skills_with_evidence": []}

	prompt = """You are an evidence linking specialist. Below are draft operational skills and the original source chunks they were extracted from.

	For each skill, find the most specific evidence excerpts from the source chunks that support it. Enrich each skill's evidence array with concrete quotes.

	Return ONLY a JSON object:
	{
	"skills": [
	{
	"id": "skill_id",
	"category": "...",
	"rule": "...",
	"rationale": "...",
	"evidence": ["Exact quote from source that supports this rule"],
	"source_files": ["filename.ext"]
	}
	]
	}

	Keep all existing fields intact. Only add or improve the evidence array."""

	skills_text = json.dumps({"skills": draft_skills}, indent=2)
	chunks_text = "\n\n---\n\n".join([c.get("text", "") for c in chunks[:25]])
	user_content = (
	f"--- Skills ---\n{skills_text}\n\n--- Source Chunks ---\n{chunks_text}"
	)

	response_str = await llm_call(prompt, user_content, max_tokens=4096)

	try:
	clean = response_str.strip()
	if clean.startswith("```json"):
	clean = clean[7:]
	elif clean.startswith("```"):
	clean = clean[3:]
	if clean.endswith("```"):
	clean = clean[:-3]
	data = json.loads(clean.strip())
	enriched = data.get("skills", draft_skills)
	except Exception as e:
	print(f"[{job_id}] [link_evidence] Parse error: {e}")
	enriched = draft_skills

	await emit(
	job_id,
	"stage",
	{
	"name": "LINKING_DONE",
	"detail": f"Evidence linked for {len(enriched)} skills",
	},
	)
	print(f"[{job_id}] link_evidence: done")
	return {"skills_with_evidence": enriched}