from typing import Dict, List def build_contribution_prompt( target_info: Dict[str, str], citing_info: Dict[str, str], contexts: List[str], ) -> str: header = [ "You are extracting how a citing paper uses or extends a target paper.", "Read the paragraph(s) below and write ONE concise contribution claim.", "Focus only on what the citing paper actually does with the target paper.", "", "Rules:", "- If the citing paper explicitly uses/adopts/evaluates on the target paper's method/data/benchmark, label USES.", "- If it explicitly extends/modifies/adapts/builds upon the target paper, label EXTENDS.", "- If the paragraph is only descriptive/background or only compares/mentions the target paper, return label NOT_CONFIRMED and empty fields.", "- Do not output comparison-only claims (e.g., 'compares to '); those are NOT_CONFIRMED.", "- Output paper_claim: one concise, paper-specific contribution claim.", "- Output cluster_title: concise natural-language cluster summary (6-14 words), generic across papers.", "- Also output cluster_key in this exact format: RELATION|artifact|purpose", "- cluster_key must be generic and reusable across papers.", "- artifact and purpose must be short snake_case phrases (e.g., dataset, evaluation_protocol, evaluation).", "- cluster_key RELATION must exactly match label.", "- Avoid overly specific keys (no paper names, no model/version numbers, no citation keys).", "- Prefer stable generic keys such as: USES|dataset|evaluation, EXTENDS|dataset|dataset_creation, USES|evaluation_protocol|evaluation.", "- If label is NOT_CONFIRMED, paper_claim, cluster_title, cluster_key, and evidence_span must be empty.", "- The evidence_span must be a verbatim substring from the provided contexts.", "- The TARGET_PAPER abstract/TLDR is for background only; do not use it as evidence.", "", "Negative example (NOT_CONFIRMED):", "Paragraph: \"We compare our method to and other baselines.\"", "Output: {\"label\":\"NOT_CONFIRMED\",\"paper_claim\":\"\",\"cluster_title\":\"\",\"cluster_key\":\"\",\"evidence_span\":\"\",\"rationale\":\"Comparison only.\"}", "", "Return JSON only.", "", "TARGET_PAPER:", f"- title: {target_info.get('title', '')}", f"- first_author_last: {target_info.get('first_author_last', '')}", f"- year: {target_info.get('year', '')}", f"- tldr: {target_info.get('tldr', '')}", f"- abstract: {target_info.get('abstract', '')}", "", "CITING_PAPER:", f"- title: {citing_info.get('title', '')}", f"- paper_id: {citing_info.get('paper_id', '')}", "", "CONTEXTS (verbatim, same order as extracted):", ] for i, text in enumerate(contexts, start=1): header.append(f"({i}) {text}") header.append("") header.append("JSON OUTPUT:") header.append( "{" "\"label\":\"USES\"," "\"paper_claim\":\"...\"," "\"cluster_title\":\"Uses target dataset for evaluation\"," "\"cluster_key\":\"USES|dataset|evaluation\"," "\"evidence_span\":\"...\"," "\"rationale\":\"...\"" "}" ) return "\n".join(header)