"""Aggregate ₹ at risk across the bench and produce the headline economic-loss number.

For each scenario in `data/chakravyuh-bench-v0/scenarios.jsonl` we read
`metadata.loss_amount_inr` and join with a per-row eval log
(`logs/eval_v2_per_row.jsonl` by default) to compute:

  - ₹ at risk (sum of loss_amount_inr across scams in the bench)
  - ₹ prevented (sum across scams the analyzer flagged early)
  - ₹ leaked    (sum across scams the analyzer missed AND money was extracted)
  - bench-level prevention rate (₹ prevented / ₹ at risk)

The headline phrasing ("Chakravyuh prevented ₹X cr of expected loss
across the n=174 bench") is sourced from this script — never fabricated.

Usage:
    python eval/rupee_weighted_eval.py \
        --bench data/chakravyuh-bench-v0/scenarios.jsonl \
        --eval-log logs/eval_v2_per_row.jsonl \
        --output logs/rupee_weighted_eval.json

The eval log must be JSONL with at least these fields per row:
    scenario_id   (matches `id` in the bench)
    flagged       (bool)
    money_extracted (bool, optional — defaults to "missed scam = money lost")

If `--eval-log` is omitted the script computes only the gross ₹ at risk
and exits — useful as a bench summary.
"""

from __future__ import annotations

import argparse
import json
import sys
from dataclasses import asdict, dataclass
from pathlib import Path


@dataclass(frozen=True)
class RupeeReport:
    n_scenarios: int
    n_scams_with_amount: int
    rupees_at_risk: float
    rupees_prevented: float
    rupees_leaked: float
    prevention_rate: float
    notes: list[str]


def _load_bench(path: Path) -> dict[str, dict]:
    out: dict[str, dict] = {}
    for line in path.read_text(encoding="utf-8").splitlines():
        line = line.strip()
        if not line:
            continue
        row = json.loads(line)
        out[row["id"]] = row
    return out


def _load_eval_log(path: Path) -> dict[str, dict]:
    out: dict[str, dict] = {}
    for line in path.read_text(encoding="utf-8").splitlines():
        line = line.strip()
        if not line:
            continue
        row = json.loads(line)
        scenario_id = row.get("scenario_id") or row.get("id")
        if scenario_id is None:
            continue
        out[str(scenario_id)] = row
    return out


def compute(
    bench: dict[str, dict],
    eval_log: dict[str, dict] | None,
) -> RupeeReport:
    notes: list[str] = []
    rupees_at_risk = 0.0
    rupees_prevented = 0.0
    rupees_leaked = 0.0
    n_scams_with_amount = 0

    for sid, scenario in bench.items():
        metadata = scenario.get("metadata") or {}
        ground = scenario.get("ground_truth") or {}
        is_scam = bool(ground.get("is_scam", True))
        if not is_scam:
            continue
        loss_raw = metadata.get("loss_amount_inr")
        if loss_raw is None:
            continue
        try:
            loss = float(loss_raw)
        except (TypeError, ValueError):
            notes.append(f"{sid}: non-numeric loss_amount_inr={loss_raw!r}")
            continue
        if loss <= 0:
            continue
        n_scams_with_amount += 1
        rupees_at_risk += loss
        if eval_log is None:
            continue
        row = eval_log.get(sid)
        if row is None:
            notes.append(f"{sid}: no eval-log row")
            continue
        flagged = bool(row.get("flagged"))
        # If money_extracted is unspecified, assume the bench's labelled
        # outcome (most scams are "money_extracted" in the bench v0 set;
        # see `metadata.outcome`).
        if "money_extracted" in row:
            money = bool(row["money_extracted"])
        else:
            money = (metadata.get("outcome") == "money_extracted")
        if flagged:
            rupees_prevented += loss
        elif money:
            rupees_leaked += loss

    prevention_rate = (
        rupees_prevented / rupees_at_risk if rupees_at_risk > 0 else 0.0
    )
    return RupeeReport(
        n_scenarios=len(bench),
        n_scams_with_amount=n_scams_with_amount,
        rupees_at_risk=round(rupees_at_risk, 2),
        rupees_prevented=round(rupees_prevented, 2),
        rupees_leaked=round(rupees_leaked, 2),
        prevention_rate=round(prevention_rate, 4),
        notes=notes,
    )


def _format_inr(amount: float) -> str:
    if amount >= 1e7:
        return f"₹{amount / 1e7:.2f} cr"
    if amount >= 1e5:
        return f"₹{amount / 1e5:.2f} lakh"
    return f"₹{amount:,.0f}"


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--bench",
        type=Path,
        default=Path("data/chakravyuh-bench-v0/scenarios.jsonl"),
        help="Path to bench scenarios JSONL.",
    )
    parser.add_argument(
        "--eval-log",
        type=Path,
        default=None,
        help=(
            "Optional per-row JSONL with `scenario_id` + `flagged` "
            "(+ optional `money_extracted`). When omitted, only ₹ at risk is computed."
        ),
    )
    parser.add_argument(
        "--output",
        type=Path,
        default=Path("logs/rupee_weighted_eval.json"),
        help="Path to write the JSON report.",
    )
    args = parser.parse_args()

    if not args.bench.exists():
        print(f"error: bench not found: {args.bench}", file=sys.stderr)
        return 2

    bench = _load_bench(args.bench)
    eval_log = _load_eval_log(args.eval_log) if args.eval_log else None
    report = compute(bench, eval_log)

    args.output.parent.mkdir(parents=True, exist_ok=True)
    args.output.write_text(json.dumps(asdict(report), indent=2), encoding="utf-8")

    print(f"Bench: {report.n_scenarios} scenarios")
    print(f"Scams with loss_amount_inr: {report.n_scams_with_amount}")
    print(f"₹ at risk:    {_format_inr(report.rupees_at_risk)}")
    if eval_log is not None:
        print(f"₹ prevented:  {_format_inr(report.rupees_prevented)}")
        print(f"₹ leaked:     {_format_inr(report.rupees_leaked)}")
        print(f"Prevention rate: {report.prevention_rate * 100:.1f}%")
    print(f"Wrote {args.output}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())