"""Adversary Lab — browse the 64 trained-Scammer outputs vs both defenders.

Renders the B.2 Phase-1 head-to-head data from
`logs/b2_phase1_scammer_vs_v2_lora.json` as a Gradio-friendly HTML
panel. Each sample shows:

  - The seed prompt that triggered the Scammer
  - The actual generated scam text
  - Scripted ScriptedAnalyzer's verdict (bypassed / caught)
  - v2 Analyzer LoRA's verdict (score, signals, explanation)
  - The asymmetry — when scripted misses but v2 catches, that IS the
    co-evolution gap made visible

This file ships zero new model dependencies — the data is pre-computed
and committed to the repo. The Adversary Lab tab is the *visible*
Theme #1 demonstration: trained adversary vs trained defender, on real
generated scam text, no hand-waving.
"""

from __future__ import annotations

import json
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path

LOG_PATH = Path(__file__).resolve().parent.parent / "logs" / "b2_phase1_scammer_vs_v2_lora.json"


@dataclass(frozen=True)
class AdversarySample:
    index: int
    seed: str
    split: str
    completion: str
    length_chars: int
    is_refusal: bool
    scripted_score: float
    scripted_caught: bool
    v2_score: float
    v2_caught: bool
    v2_signals: tuple[str, ...]
    v2_explanation: str


def _parse_v2_response(raw: str) -> tuple[tuple[str, ...], str]:
    """Best-effort parse of the v2 JSON response. Returns (signals, explanation)."""
    if not raw:
        return ((), "")
    try:
        # The v2 response is JSON-formatted; sometimes wrapped in code-fence
        text = raw.strip()
        if text.startswith("```"):
            text = text.split("```", 2)[1]
            if text.startswith("json"):
                text = text[4:]
        decoded = json.loads(text)
        signals = decoded.get("signals", []) or []
        if not isinstance(signals, list):
            signals = [str(signals)]
        explanation = str(decoded.get("explanation", "") or "")
        return (tuple(str(s) for s in signals), explanation)
    except (json.JSONDecodeError, IndexError, KeyError):
        return ((), raw[:240])


@lru_cache(maxsize=1)
def load_samples() -> tuple[AdversarySample, ...]:
    if not LOG_PATH.exists():
        return ()
    raw = json.loads(LOG_PATH.read_text(encoding="utf-8"))
    samples_raw = raw.get("samples") or []
    out: list[AdversarySample] = []
    for i, s in enumerate(samples_raw):
        # Scripted bypass = True means scripted *missed*; caught = NOT bypass.
        scripted_caught = not bool(s.get("bypass", False))
        scripted_score = (
            float(s.get("scripted_score", 1.0 - float(s.get("reward", 0.0))))
            if "scripted_score" in s
            else round(1.0 - float(s.get("reward", 0.5)), 3)
        )
        v2_signals, v2_explanation = _parse_v2_response(s.get("v2_raw_response", ""))
        out.append(
            AdversarySample(
                index=i,
                seed=str(s.get("seed", "")),
                split=str(s.get("split", "")),
                completion=str(s.get("completion", "")),
                length_chars=int(s.get("length_chars", len(s.get("completion", "")))),
                is_refusal=bool(s.get("is_refusal", False)),
                scripted_score=scripted_score,
                scripted_caught=scripted_caught,
                v2_score=float(s.get("v2_score", 0.0)),
                v2_caught=bool(s.get("v2_caught", False)),
                v2_signals=v2_signals,
                v2_explanation=v2_explanation,
            )
        )
    return tuple(out)


def aggregate_stats() -> dict[str, object]:
    samples = load_samples()
    if not samples:
        return {"n": 0}
    by_split: dict[str, dict[str, int]] = {}
    cells = {"both_caught": 0, "only_scripted_missed": 0, "only_v2_missed": 0, "both_missed": 0}
    for s in samples:
        slot = by_split.setdefault(s.split, {"n": 0, "scripted_caught": 0, "v2_caught": 0})
        slot["n"] += 1
        slot["scripted_caught"] += int(s.scripted_caught)
        slot["v2_caught"] += int(s.v2_caught)
        if s.scripted_caught and s.v2_caught:
            cells["both_caught"] += 1
        elif (not s.scripted_caught) and s.v2_caught:
            cells["only_scripted_missed"] += 1
        elif s.scripted_caught and (not s.v2_caught):
            cells["only_v2_missed"] += 1
        else:
            cells["both_missed"] += 1
    return {"n": len(samples), "by_split": by_split, "cells": cells}


def sample_choice_labels() -> list[tuple[str, int]]:
    samples = load_samples()
    out: list[tuple[str, int]] = []
    for s in samples:
        scripted_tag = "✓" if s.scripted_caught else "✗"
        v2_tag = "✓" if s.v2_caught else "✗"
        seed_short = s.seed[:55].replace("\n", " ")
        if len(s.seed) > 55:
            seed_short += "…"
        label = f"#{s.index:02d} [{s.split:8s}] scripted {scripted_tag} · v2 {v2_tag} — {seed_short}"
        out.append((label, s.index))
    return out


def _verdict_pill(caught: bool) -> str:
    if caught:
        return (
            '<span style="display:inline-block;padding:3px 10px;border-radius:999px;'
            'background:#e8f5e9;color:#1b5e20;font-weight:700;font-size:12px;">CAUGHT</span>'
        )
    return (
        '<span style="display:inline-block;padding:3px 10px;border-radius:999px;'
        'background:#ffebee;color:#b71c1c;font-weight:700;font-size:12px;">BYPASSED</span>'
    )


def render_aggregate_banner() -> str:
    stats = aggregate_stats()
    if stats["n"] == 0:
        return '<div style="color:#b71c1c;">Adversary Lab data not loaded — `logs/b2_phase1_scammer_vs_v2_lora.json` missing.</div>'
    cells = stats["cells"]
    by_split = stats["by_split"]
    total = stats["n"]
    train = by_split.get("train", {"n": 0, "scripted_caught": 0, "v2_caught": 0})
    held = by_split.get("held_out", {"n": 0, "scripted_caught": 0, "v2_caught": 0})

    def _row(name: str, n: int, sc: int, v2c: int) -> str:
        sc_pct = 100 * (1 - sc / n) if n else 0
        v2_pct = 100 * (1 - v2c / n) if n else 0
        gap = sc_pct - v2_pct
        return (
            f"<tr><td style='padding:4px 12px;'>{name}</td>"
            f"<td style='padding:4px 12px;text-align:right;'>{n}</td>"
            f"<td style='padding:4px 12px;text-align:right;color:#b71c1c;font-weight:700;'>{sc_pct:.1f}%</td>"
            f"<td style='padding:4px 12px;text-align:right;color:#1b5e20;font-weight:700;'>{v2_pct:.1f}%</td>"
            f"<td style='padding:4px 12px;text-align:right;font-weight:700;'>+{gap:.1f} pp</td></tr>"
        )

    return f"""
<div style="background:#FFF3E6;border:1px solid #381932;border-radius:8px;padding:14px 18px;margin:6px 0 14px;">
  <div style="font-weight:700;font-size:15px;margin-bottom:8px;color:#381932;">
    B.2 Phase-1 head-to-head — same Scammer outputs vs both defenders (n={total})
  </div>
  <table style="border-collapse:collapse;font-size:13px;width:100%;">
    <thead>
      <tr style="border-bottom:1px solid #381932;color:#381932;">
        <th style="padding:4px 12px;text-align:left;">Split</th>
        <th style="padding:4px 12px;text-align:right;">n</th>
        <th style="padding:4px 12px;text-align:right;">Scripted bypass</th>
        <th style="padding:4px 12px;text-align:right;">v2 LoRA bypass</th>
        <th style="padding:4px 12px;text-align:right;">Gap</th>
      </tr>
    </thead>
    <tbody>
      {_row("Train", train['n'], train['scripted_caught'], train['v2_caught'])}
      {_row("Held-out (novel)", held['n'], held['scripted_caught'], held['v2_caught'])}
      {_row("Overall", total, train['scripted_caught'] + held['scripted_caught'],
            train['v2_caught'] + held['v2_caught'])}
    </tbody>
  </table>
  <div style="margin-top:10px;font-size:12px;color:#000;">
    <strong>Cross-tab:</strong>
    {cells['both_caught']} both caught ·
    <strong style="color:#1b5e20;">{cells['only_scripted_missed']} only-scripted-missed</strong>
    (the co-evolution wins) ·
    {cells['only_v2_missed']} only-v2-missed (v3 targets) ·
    {cells['both_missed']} both missed
  </div>
</div>
"""


def render_sample(index: int) -> str:
    samples = load_samples()
    if index < 0 or index >= len(samples):
        return '<div style="color:#b71c1c;">Sample index out of range.</div>'
    s = samples[index]
    seed_html = (
        s.seed.replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace("\n", "<br>")
    )
    completion_html = (
        s.completion.replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace("\n", "<br>")
    )
    explanation_html = (
        s.v2_explanation.replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
    )
    signals_html = " ".join(
        f'<span style="display:inline-block;padding:2px 8px;border-radius:999px;'
        f'background:#FFF3E6;border:1px solid #381932;color:#381932;'
        f'margin:2px 4px 2px 0;font-size:11px;">{sig}</span>'
        for sig in s.v2_signals
    )
    if not signals_html:
        signals_html = '<span style="color:rgba(0,0,0,0.55);font-size:12px;">(no signals declared)</span>'

    asymmetry_note = ""
    if (not s.scripted_caught) and s.v2_caught:
        asymmetry_note = (
            '<div style="background:#e8f5e9;border-left:4px solid #1b5e20;padding:8px 12px;'
            'margin-top:10px;font-size:13px;color:#1b5e20;">'
            "<strong>This is the co-evolution win.</strong> The trained Scammer evaded the "
            "rule-based detector — but the v2 Analyzer LoRA (trained on the env's 8-rubric "
            "reward) catches it. This is the kind of pair that the +60 pp head-to-head gap "
            "is built from."
            "</div>"
        )
    elif s.scripted_caught and (not s.v2_caught):
        asymmetry_note = (
            '<div style="background:#fff3e0;border-left:4px solid #e65100;padding:8px 12px;'
            'margin-top:10px;font-size:13px;color:#bf360c;">'
            "<strong>v3 target.</strong> Scripted's keyword rules caught this, but our "
            "v2 LoRA missed it — typically a non-bank category outside v2's training "
            "distribution. Phase-2 LoRA-vs-LoRA retrain (queued for onsite GPU) closes "
            "exactly these cases."
            "</div>"
        )

    return f"""
<div style="background:#fff;border:1px solid #381932;border-radius:8px;padding:16px 18px;">
  <div style="font-size:11px;color:rgba(0,0,0,0.55);margin-bottom:6px;">
    Sample #{s.index} · split = <strong>{s.split}</strong> · {s.length_chars} chars
    {' · <strong style="color:#b71c1c;">REFUSAL</strong>' if s.is_refusal else ''}
  </div>

  <div style="font-weight:700;font-size:13px;color:#381932;margin-bottom:4px;">
    Seed prompt (what the trained Scammer was asked to write)
  </div>
  <div style="background:#FFF3E6;padding:10px 12px;border-radius:6px;margin-bottom:14px;
              font-size:13px;line-height:1.5;color:#000;">{seed_html}</div>

  <div style="font-weight:700;font-size:13px;color:#381932;margin-bottom:4px;">
    Generated scam text (Qwen2.5-0.5B + LoRA, after 200 GRPO episodes)
  </div>
  <div style="background:#FFF;border:1px dashed #381932;padding:10px 12px;border-radius:6px;
              margin-bottom:14px;font-size:13px;line-height:1.5;color:#000;
              font-family:'SF Mono','Menlo',monospace;">{completion_html}</div>

  <div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;">
    <div style="background:#FFF;border:2px solid #b71c1c;border-radius:6px;padding:12px;">
      <div style="font-weight:700;font-size:13px;color:#381932;margin-bottom:6px;">
        Defender 1 — rule-based <code>ScriptedAnalyzer</code>
      </div>
      <div style="font-size:13px;color:#000;">
        Score: <strong>{s.scripted_score:.2f}</strong> · {_verdict_pill(s.scripted_caught)}
      </div>
    </div>
    <div style="background:#FFF;border:2px solid #1b5e20;border-radius:6px;padding:12px;">
      <div style="font-weight:700;font-size:13px;color:#381932;margin-bottom:6px;">
        Defender 2 — <code>chakravyuh-analyzer-lora-v2</code> (trained)
      </div>
      <div style="font-size:13px;color:#000;margin-bottom:6px;">
        Score: <strong>{s.v2_score:.2f}</strong> · {_verdict_pill(s.v2_caught)}
      </div>
      <div style="font-size:11px;color:rgba(0,0,0,0.55);margin-bottom:4px;">Signals declared:</div>
      <div style="margin-bottom:8px;">{signals_html}</div>
      <div style="font-size:11px;color:rgba(0,0,0,0.55);margin-bottom:2px;">Explanation:</div>
      <div style="font-size:12px;line-height:1.5;color:#000;">{explanation_html or '<em style="color:rgba(0,0,0,0.55);">(no parseable explanation)</em>'}</div>
    </div>
  </div>

  {asymmetry_note}
</div>
"""