File size: 3,649 Bytes
0a55f0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import annotations

import json
from pathlib import Path

import typer

from .paper_package import load_paper_package

from .pipeline import TwoPassAnnotationPipeline


app = typer.Typer(help="Run step 8: derive target contributions, enabling contributions, and groundings.")


def _default_output_root() -> Path:
    return Path("runs/two_pass_outputs")


@app.command()
def run(
    paper_dir: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True),
    provider: str = typer.Option("openai", help="Provider family: openai or gemini."),
    model: str = typer.Option("openai/gpt-5", help="Reasoning model used for target-contribution derivation and annotation."),
    formatter_model: str | None = typer.Option(
        None,
        help="Optional model override for pass 2 formatting, e.g. openai/gpt-5-mini or openai/gpt-5.4-pro.",
    ),
    judge_model: str | None = typer.Option(
        None,
        help="Optional model override for pass 1 candidate ranking. Ignored when --candidate-count=1.",
    ),
    candidate_count: int = typer.Option(
        1,
        help="Number of reasoning candidates to generate. If set to 1, no judge call is made.",
    ),
    formatter_max_attempts: int = typer.Option(
        3,
        help="Formatter-only retry attempts after pass 1 has succeeded.",
    ),
    include_reference_examples: bool = typer.Option(
        True,
        "--include-reference-examples/--no-include-reference-examples",
        help="Include the built-in reference examples in the pass-1 reasoning prompt.",
    ),
    prompt_profile: str = typer.Option(
        "full",
        help="Reasoning prompt profile: full or generic.",
    ),
    output_root: Path = typer.Option(
        _default_output_root(),
        help="Directory to store run outputs.",
    ),
    run_label: str | None = typer.Option(None, help="Optional label to include in the saved run directory name."),
    annotator_id: str = typer.Option("llm", help="Annotator id to embed in the final UI payload."),
    extracted_claim: str | None = typer.Option(None, help="Optional override for the extracted target contribution."),
) -> None:
    paper = load_paper_package(paper_dir, extracted_claim_override=extracted_claim)
    pipeline = TwoPassAnnotationPipeline(
        provider=provider,
        model=model,
        formatter_model=formatter_model,
        judge_model=judge_model,
        output_root=output_root,
        run_label=run_label,
        annotator_id=annotator_id,
        candidate_count=candidate_count,
        formatter_max_attempts=formatter_max_attempts,
        include_reference_examples=include_reference_examples,
        prompt_profile=prompt_profile,
        progress_callback=typer.echo,
    )
    result = pipeline.run(paper)
    typer.echo(str(result.run_dir / "run_output.json"))


@app.command()
def summarize(run_output: Path = typer.Option(..., exists=True, dir_okay=False, file_okay=True)) -> None:
    data = json.loads(run_output.read_text())
    payload = data.get("ui_payload") or {}
    claims = payload.get("claims") or []
    summary = {
        "paper_id": data.get("paper_id"),
        "target_contribution_count": len(claims),
        "target_contributions": [
            {
                "claim_id": claim.get("claim_id"),
                "rewritten_claim": claim.get("rewritten_claim"),
                "decision": claim.get("decision"),
                "enabling_contribution_count": len(claim.get("ingredients") or []),
            }
            for claim in claims
        ],
    }
    typer.echo(json.dumps(summary, indent=2))


if __name__ == "__main__":
    app()