Spaces:

EricCham8
/

Scipaths

Running

Scipaths / src /step_05_verify_uses_extends /prompts.py

Eric Chamoun

Initial SciPaths Space release

0a55f0f 1 day ago

5.05 kB

	from typing import Dict, List


	USES_DEFINITION = (
	"USES: The CITING_PAPER explicitly uses/adopts/evaluates on/includes/relies on "
	"a dataset, benchmark, method, tool, or reported results from TARGET_PAPER "
	"as part of the CITING_PAPER's own methodology or evaluation."
	)

	EXTENDS_DEFINITION = (
	"EXTENDS: The CITING_PAPER explicitly extends/modifies/adapts/builds upon "
	"TARGET_PAPER's method/dataset/benchmark/tool."
	)

	NOTES_DEFINITION = (
	"NOT USES/EXTENDS: Merely describing what TARGET_PAPER introduces/offers/proposes "
	"or listing it among related work or benchmarks (without stating adoption). "
	"If no explicit adoption/extension cue is present, label NOT_CONFIRMED."
	)


	FEW_SHOT_USES = [
	"We use the same splits as <CITED HERE> .",
	"The Praat tool was used ( <CITED HERE> ) .",
	"CCGBank ( <CITED HERE> ) is used to train the model .",
	"This design idea was adopted from TANKA ( <CITED HERE>b ) .",
	"Our strategy is based on the approach presented by <CITED HERE> .",
	]

	FEW_SHOT_EXTENDS = [
	"The features can be easily obtained by modifying the TAT extraction algorithm described in ( <CITED HERE> ) .",
	"Our own work ( <CITED HERE> ) extends the first idea to paraphrase fragment extraction on monolingual parallel and comparable corpora .",
	"This article represents an extension of our previous work on unsupervised event coreference resolution ( Bejan et al. 2009 ; <CITED HERE> ) .",
	"This evaluation set-up is an improvement versus the one we previously reported ( <CITED HERE> ) , in which fixed partitions were used for training , development , and testing .",
	"The computational treatment of lexical rules proposed can be seen as an extension to the principled method discussed by Gotz and <CITED HERE> , 1996 , 1997b ) for encoding the main building block of HPSG grammars -- the implicative constraints -- as a logic program .",
	]

	FEW_SHOT_NOT_CONFIRMED = [
	"<CITED HERE> introduced factored SMT .",
	"See ( <CITED HERE> ) for a discussion .",
	"See , among others , ( <CITED HERE> ) .",
	"<CITED HERE> reported a correlation of r = .69 .",
	"See <CITED HERE> for further discussion .",
	]


	def build_uses_extends_verification_prompt(
	target_info: Dict[str, str],
	candidates: List[Dict[str, str]],
	) -> str:
	header = [
	"You are verifying citation function for a TARGET paper inside a citing sentence.",
	"Be strict: lists of related work or benchmarks are NOT USES/EXTENDS unless there is an explicit action",
	"like \"use\", \"build on\", \"adopt\", \"extend\", \"based on\", \"trained on\", \"evaluate on\", \"implement\".",
	"",
	"Actor test (CRITICAL for USES/EXTENSION):",
	"- Only label USES or EXTENSION if the ACTION is performed by the CITING_PAPER.",
	"- The cue_span for USES/EXTENSION must include an explicit citing-paper actor phrase such as:",
	" \"we\", \"our\", \"in this work\", \"in this paper\", \"we use\", \"we evaluate\",",
	" \"our evaluation includes\", \"we extend\", \"we build on\", \"we adapt\".",
	"- If the context says the TARGET_PAPER (or some other paper/system) uses/extends something",
	" (e.g., \"TARGET_PAPER uses...\", \"TARGET_PAPER extends...\"),",
	" then it is NOT USES/EXTENSION. Label NOT_CONFIRMED.",
	"",
	"Task: Label each sentence as USES, EXTENDS, or NOT_CONFIRMED.",
	"Return JSON only with one entry per input sentence.",
	"",
	"Definitions:",
	f"- {USES_DEFINITION}",
	f"- {EXTENDS_DEFINITION}",
	f"- {NOTES_DEFINITION}",
	"",
	"Output rules:",
	"- label must be one of: USES, EXTENDS, NOT_CONFIRMED",
	"- cue_span: exact substring from the sentence that justifies USES/EXTENDS, else empty",
	"- rationale: one short sentence",
	"- If cue_span is empty => label must be NOT_CONFIRMED",
	"",
	"Few-shot examples:",
	"USES:",
	]
	for ex in FEW_SHOT_USES:
	header.append(f"- {ex}")
	header.append("EXTENDS:")
	for ex in FEW_SHOT_EXTENDS:
	header.append(f"- {ex}")
	header.append("NOT_CONFIRMED:")
	for ex in FEW_SHOT_NOT_CONFIRMED:
	header.append(f"- {ex}")

	header.extend(
	[
	"",
	"TARGET_PAPER:",
	f"- title: {target_info.get('title', '')}",
	f"- first_author_last: {target_info.get('first_author_last', '')}",
	f"- year: {target_info.get('year', '')}",
	"",
	"CANDIDATES:",
	]
	)

	for item in candidates:
	header.extend(
	[
	f"ID: {item['id']}",
	f"Citing paper: {item.get('citing_title', '')}",
	f"Sentence: {item.get('text', '')}",
	"",
	]
	)

	header.append("JSON OUTPUT:")
	header.append("{\"labels\": [{\"id\": 1, \"label\": \"USES\", \"cue_span\": \"...\", \"rationale\": \"...\"}]}")
	return "\n".join(header)