from __future__ import annotations

import json
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import FancyBboxPatch


ROOT = Path(__file__).resolve().parents[1]
FIGURES = ROOT / "figures"
DATA = ROOT / "data"


def load_json(path: Path) -> dict:
    return json.loads(path.read_text(encoding="utf-8"))


def first_result(root: Path, pattern: str) -> Path:
    matches = sorted(root.glob(pattern))
    if not matches:
        raise FileNotFoundError(pattern)
    return matches[0]


def pct(value: float) -> str:
    return f"{value * 100:.1f}%"


def add_gradient_bar(ax, patch, top_color: str, bottom_color: str) -> None:
    x, y = patch.get_x(), patch.get_y()
    w, h = patch.get_width(), patch.get_height()
    if h <= 0:
        return

    def rgb(hex_color: str) -> np.ndarray:
        hex_color = hex_color.lstrip("#")
        return np.array([int(hex_color[i : i + 2], 16) / 255 for i in (0, 2, 4)])

    top = rgb(top_color)
    bottom = rgb(bottom_color)
    gradient = np.linspace(bottom, top, 256).reshape(256, 1, 3)
    ax.imshow(
        gradient,
        extent=[x, x + w, y, y + h],
        origin="lower",
        aspect="auto",
        clip_path=patch,
        clip_on=True,
        zorder=patch.get_zorder() + 0.1,
    )
    patch.set_facecolor((1, 1, 1, 0))
    patch.set_edgecolor((1, 1, 1, 0))


def main() -> None:
    FIGURES.mkdir(parents=True, exist_ok=True)
    DATA.mkdir(parents=True, exist_ok=True)

    remote = ROOT / "raw" / "remote_benchmarks"
    bfcl_scores = ROOT / "raw" / "bfcl_scores"

    base_ifeval_path = first_result(remote, "ifeval_base/*/results_*.json")
    adapter_ifeval_path = first_result(remote, "ifeval_adapter/*/results_*.json")

    base_ifeval = load_json(base_ifeval_path)["results"]["ifeval"]
    adapter_ifeval = load_json(adapter_ifeval_path)["results"]["ifeval"]
    validation_summary = load_json(remote / "qwen36_carnice_benchmark_summary_20260425.json")[
        "training_format_validation"
    ]

    bfcl_base = load_json(
        bfcl_scores / "qwen36-base-local-FC" / "multi_turn" / "BFCL_v4_multi_turn_base_score.json"
    )
    bfcl_adapter_path = (
        bfcl_scores
        / "qwen36-carnice-v1-local-FC"
        / "multi_turn"
        / "BFCL_v4_multi_turn_base_score.json"
    )
    bfcl_adapter = json.loads(
        next(line for line in bfcl_adapter_path.read_text(encoding="utf-8").splitlines() if line.strip())
    )

    metrics = {
        "run": "qwen36_short_public_ab_20260425_155339",
        "model": {
            "base": "Qwen/Qwen3.6-27B",
            "carnice_sft": "qwen36_carnice_direct_v1b_lora_8192_split_200step",
        },
        "note": "All plotted values are raw measured values from the included benchmark files.",
        "ifeval_limit_20": {
            "base": {
                "prompt_strict": base_ifeval["prompt_level_strict_acc,none"],
                "prompt_loose": base_ifeval["prompt_level_loose_acc,none"],
                "instruction_strict": base_ifeval["inst_level_strict_acc,none"],
                "instruction_loose": base_ifeval["inst_level_loose_acc,none"],
            },
            "carnice_sft": {
                "prompt_strict": adapter_ifeval["prompt_level_strict_acc,none"],
                "prompt_loose": adapter_ifeval["prompt_level_loose_acc,none"],
                "instruction_strict": adapter_ifeval["inst_level_strict_acc,none"],
                "instruction_loose": adapter_ifeval["inst_level_loose_acc,none"],
            },
        },
        "heldout_training_format_validation": validation_summary,
        "bfcl_multi_turn_base_limit_2": {
            "base": bfcl_base,
            "carnice_sft": {
                "accuracy": bfcl_adapter["accuracy"],
                "correct_count": bfcl_adapter["correct_count"],
                "total_count": bfcl_adapter["total_count"],
            },
        },
        "source_files": {
            "ifeval_base": str(base_ifeval_path.relative_to(ROOT)),
            "ifeval_carnice_sft": str(adapter_ifeval_path.relative_to(ROOT)),
            "bfcl_scores": "raw/bfcl_scores/",
            "validation": "raw/remote_benchmarks/qwen36_carnice_benchmark_summary_20260425.json",
        },
    }
    (DATA / "metrics.json").write_text(json.dumps(metrics, indent=2) + "\n", encoding="utf-8")

    labels = [
        ("Prompt strict", "prompt_strict"),
        ("Prompt loose", "prompt_loose"),
        ("Instruction strict", "instruction_strict"),
        ("Instruction loose", "instruction_loose"),
    ]
    base_vals = [metrics["ifeval_limit_20"]["base"][key] for _, key in labels]
    carnice_vals = [metrics["ifeval_limit_20"]["carnice_sft"][key] for _, key in labels]

    base_loss = validation_summary["base_eval_loss"]
    carnice_loss = validation_summary["adapter_eval_loss"]
    base_ppl = validation_summary["base_eval_perplexity"]
    carnice_ppl = validation_summary["adapter_eval_perplexity"]
    loss_reduction = (base_loss - carnice_loss) / base_loss
    ppl_reduction = (base_ppl - carnice_ppl) / base_ppl

    plt.rcParams.update(
        {
            "font.family": "DejaVu Sans",
            "figure.facecolor": "#ffffff",
            "axes.facecolor": "#ffffff",
            "savefig.facecolor": "#ffffff",
            "text.color": "#0f1115",
            "axes.labelcolor": "#0f1115",
            "xtick.color": "#0f1115",
            "ytick.color": "#4b5563",
            "axes.edgecolor": "#a6a6a6",
        }
    )

    fig = plt.figure(figsize=(12.93, 6.55), dpi=200)
    left = [0.060, 0.21, 0.60, 0.66]
    right = [0.735, 0.29, 0.235, 0.56]

    for x, y, w, h in [(0.018, 0.04, 0.66, 0.92), (0.705, 0.04, 0.275, 0.92)]:
        fig.add_artist(
            FancyBboxPatch(
                (x, y),
                w,
                h,
                boxstyle="round,pad=0.016,rounding_size=0.025",
                transform=fig.transFigure,
                linewidth=1.0,
                edgecolor="#e4e7ec",
                facecolor="#ffffff",
                zorder=-10,
            )
        )

    ax = fig.add_axes(left)
    x = np.arange(len(labels)) * 1.55
    width = 0.46
    base_bars = ax.bar(x - width / 2, base_vals, width=width, color="#d8c6ef", zorder=3)
    carnice_bars = ax.bar(x + width / 2, carnice_vals, width=width, color="#cbd9f7", zorder=3)
    for patch in base_bars:
        add_gradient_bar(ax, patch, "#d8c5ef", "#efe7fa")
    for patch in carnice_bars:
        add_gradient_bar(ax, patch, "#c8d7f6", "#eef4ff")

    ax.set_xlim(x[0] - 0.70, x[-1] + 0.70)
    ax.set_ylim(0.75, 1.005)
    yticks = np.arange(0.75, 1.01, 0.05)
    ax.set_yticks(yticks)
    ax.set_yticklabels([f"{int(round(v * 100))}%" for v in yticks], fontsize=10)
    ax.grid(axis="y", color="#dfe3ea", linewidth=1.0, linestyle=(0, (2.2, 2.2)), zorder=0)
    ax.spines[["top", "right", "left"]].set_visible(False)
    ax.spines["bottom"].set_color("#9ca3af")
    ax.tick_params(axis="y", length=0, pad=8)
    ax.tick_params(axis="x", length=0, pad=10)

    tick_positions = []
    tick_labels = []
    for i, (label, _) in enumerate(labels):
        tick_positions.extend([x[i] - width / 2, x[i] + width / 2])
        tick_labels.extend(["Base", "Carnice SFT"])
        ax.text(x[i], 0.718, label, ha="center", va="top", fontsize=13, clip_on=False)
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, fontsize=9)

    for bars, values in [(base_bars, base_vals), (carnice_bars, carnice_vals)]:
        for patch, value in zip(bars, values):
            ax.text(
                patch.get_x() + patch.get_width() / 2,
                value + 0.006,
                pct(value),
                ha="center",
                va="bottom",
                fontsize=13,
                fontweight="bold",
                color="#0f1115",
            )

    ax2 = fig.add_axes(right)
    reductions = [loss_reduction, ppl_reduction]
    reduction_x = [0, 1.35]
    bars = ax2.bar(reduction_x, reductions, width=0.68, color=["#d8c5ef", "#ffb0a2"], zorder=3)
    add_gradient_bar(ax2, bars[0], "#d8c5ef", "#efe7fa")
    add_gradient_bar(ax2, bars[1], "#ffaaa0", "#ffd9d0")
    ax2.set_xlim(-0.70, 2.05)
    ax2.set_ylim(0, 0.38)
    ax2.axis("off")

    for i, (bar, value, label) in enumerate(
        zip(bars, reductions, ["Loss\nreduction", "Perplexity\nreduction"])
    ):
        x_pos = reduction_x[i]
        ax2.text(x_pos, value + 0.017, pct(value), ha="center", va="bottom", fontsize=17, fontweight="bold")
        ax2.text(x_pos, -0.017, label, ha="center", va="top", fontsize=11, clip_on=False)

    fig.text(0.845, 0.145, f"Validation loss: {base_loss:.3f} \u2192 {carnice_loss:.3f}",
             ha="center", va="center", fontsize=10, color="#667085")
    fig.text(0.845, 0.092, f"Validation perplexity: {base_ppl:.3f} \u2192 {carnice_ppl:.3f}",
             ha="center", va="center", fontsize=10, color="#667085")

    for path in [FIGURES / "qwen36_carnice_sft_benchmark_card.png", FIGURES / "qwen36_carnice_sft_benchmark_card.svg"]:
        fig.savefig(path)

    print(FIGURES / "qwen36_carnice_sft_benchmark_card.png")


if __name__ == "__main__":
    main()