Spaces:

EricCham8
/

Scipaths

Running

Scipaths / src /step_08_annotation /cli.py

Eric Chamoun

Initial SciPaths Space release

0a55f0f 1 day ago

3.65 kB

	from __future__ import annotations

	import json
	from pathlib import Path

	import typer

	from .paper_package import load_paper_package

	from .pipeline import TwoPassAnnotationPipeline


	app = typer.Typer(help="Run step 8: derive target contributions, enabling contributions, and groundings.")


	def _default_output_root() -> Path:
	return Path("runs/two_pass_outputs")


	@app.command()
	def run(
	paper_dir: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True),
	provider: str = typer.Option("openai", help="Provider family: openai or gemini."),
	model: str = typer.Option("openai/gpt-5", help="Reasoning model used for target-contribution derivation and annotation."),
	formatter_model: str \| None = typer.Option(
	None,
	help="Optional model override for pass 2 formatting, e.g. openai/gpt-5-mini or openai/gpt-5.4-pro.",
	),
	judge_model: str \| None = typer.Option(
	None,
	help="Optional model override for pass 1 candidate ranking. Ignored when --candidate-count=1.",
	),
	candidate_count: int = typer.Option(
	1,
	help="Number of reasoning candidates to generate. If set to 1, no judge call is made.",
	),
	formatter_max_attempts: int = typer.Option(
	3,
	help="Formatter-only retry attempts after pass 1 has succeeded.",
	),
	include_reference_examples: bool = typer.Option(
	True,
	"--include-reference-examples/--no-include-reference-examples",
	help="Include the built-in reference examples in the pass-1 reasoning prompt.",
	),
	prompt_profile: str = typer.Option(
	"full",
	help="Reasoning prompt profile: full or generic.",
	),
	output_root: Path = typer.Option(
	_default_output_root(),
	help="Directory to store run outputs.",
	),
	run_label: str \| None = typer.Option(None, help="Optional label to include in the saved run directory name."),
	annotator_id: str = typer.Option("llm", help="Annotator id to embed in the final UI payload."),
	extracted_claim: str \| None = typer.Option(None, help="Optional override for the extracted target contribution."),
	) -> None:
	paper = load_paper_package(paper_dir, extracted_claim_override=extracted_claim)
	pipeline = TwoPassAnnotationPipeline(
	provider=provider,
	model=model,
	formatter_model=formatter_model,
	judge_model=judge_model,
	output_root=output_root,
	run_label=run_label,
	annotator_id=annotator_id,
	candidate_count=candidate_count,
	formatter_max_attempts=formatter_max_attempts,
	include_reference_examples=include_reference_examples,
	prompt_profile=prompt_profile,
	progress_callback=typer.echo,
	)
	result = pipeline.run(paper)
	typer.echo(str(result.run_dir / "run_output.json"))


	@app.command()
	def summarize(run_output: Path = typer.Option(..., exists=True, dir_okay=False, file_okay=True)) -> None:
	data = json.loads(run_output.read_text())
	payload = data.get("ui_payload") or {}
	claims = payload.get("claims") or []
	summary = {
	"paper_id": data.get("paper_id"),
	"target_contribution_count": len(claims),
	"target_contributions": [
	{
	"claim_id": claim.get("claim_id"),
	"rewritten_claim": claim.get("rewritten_claim"),
	"decision": claim.get("decision"),
	"enabling_contribution_count": len(claim.get("ingredients") or []),
	}
	for claim in claims
	],
	}
	typer.echo(json.dumps(summary, indent=2))


	if __name__ == "__main__":
	app()