Eric Chamoun
Initial SciPaths Space release
0a55f0f
from typing import Dict, List
def build_contribution_prompt(
target_info: Dict[str, str],
citing_info: Dict[str, str],
contexts: List[str],
) -> str:
header = [
"You are extracting how a citing paper uses or extends a target paper.",
"Read the paragraph(s) below and write ONE concise contribution claim.",
"Focus only on what the citing paper actually does with the target paper.",
"",
"Rules:",
"- If the citing paper explicitly uses/adopts/evaluates on the target paper's method/data/benchmark, label USES.",
"- If it explicitly extends/modifies/adapts/builds upon the target paper, label EXTENDS.",
"- If the paragraph is only descriptive/background or only compares/mentions the target paper, return label NOT_CONFIRMED and empty fields.",
"- Do not output comparison-only claims (e.g., 'compares to <CITED HERE>'); those are NOT_CONFIRMED.",
"- Output paper_claim: one concise, paper-specific contribution claim.",
"- Output cluster_title: concise natural-language cluster summary (6-14 words), generic across papers.",
"- Also output cluster_key in this exact format: RELATION|artifact|purpose",
"- cluster_key must be generic and reusable across papers.",
"- artifact and purpose must be short snake_case phrases (e.g., dataset, evaluation_protocol, evaluation).",
"- cluster_key RELATION must exactly match label.",
"- Avoid overly specific keys (no paper names, no model/version numbers, no citation keys).",
"- Prefer stable generic keys such as: USES|dataset|evaluation, EXTENDS|dataset|dataset_creation, USES|evaluation_protocol|evaluation.",
"- If label is NOT_CONFIRMED, paper_claim, cluster_title, cluster_key, and evidence_span must be empty.",
"- The evidence_span must be a verbatim substring from the provided contexts.",
"- The TARGET_PAPER abstract/TLDR is for background only; do not use it as evidence.",
"",
"Negative example (NOT_CONFIRMED):",
"Paragraph: \"We compare our method to <CITED HERE> and other baselines.\"",
"Output: {\"label\":\"NOT_CONFIRMED\",\"paper_claim\":\"\",\"cluster_title\":\"\",\"cluster_key\":\"\",\"evidence_span\":\"\",\"rationale\":\"Comparison only.\"}",
"",
"Return JSON only.",
"",
"TARGET_PAPER:",
f"- title: {target_info.get('title', '')}",
f"- first_author_last: {target_info.get('first_author_last', '')}",
f"- year: {target_info.get('year', '')}",
f"- tldr: {target_info.get('tldr', '')}",
f"- abstract: {target_info.get('abstract', '')}",
"",
"CITING_PAPER:",
f"- title: {citing_info.get('title', '')}",
f"- paper_id: {citing_info.get('paper_id', '')}",
"",
"CONTEXTS (verbatim, same order as extracted):",
]
for i, text in enumerate(contexts, start=1):
header.append(f"({i}) {text}")
header.append("")
header.append("JSON OUTPUT:")
header.append(
"{"
"\"label\":\"USES\","
"\"paper_claim\":\"...\","
"\"cluster_title\":\"Uses target dataset for evaluation\","
"\"cluster_key\":\"USES|dataset|evaluation\","
"\"evidence_span\":\"...\","
"\"rationale\":\"...\""
"}"
)
return "\n".join(header)