#!/usr/bin/env bash
# ChaosOps AI — A/B comparison Job entry-point.
#
# Pulls two LoRA adapters, evaluates each as the `trained` policy across
# the full curriculum, writes a single side-by-side report, and uploads
# everything to the WINNER's model repo.
#
# Required env:
#   ADAPTER_A         repo id, e.g. helloAK96/chaosops-grpo-lora-p1
#   ADAPTER_B         repo id, e.g. helloAK96/chaosops-grpo-lora-p2
#   EPISODES_PER_TYPE default 5
#
# Output (uploaded to whichever repo wins on summed mean reward):
#   ab_report.txt              — side-by-side per-tier table
#   ab_comparison_curve.png    — both trained lines overlaid on baselines

set -euo pipefail

EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}"
ADAPTER_A="${ADAPTER_A:?ADAPTER_A required}"
ADAPTER_B="${ADAPTER_B:?ADAPTER_B required}"

echo "==[chaosops]== installing deps"
pip install --quiet --upgrade pip
pip install --quiet --no-deps "torch==2.4.1+cu124" \
    --index-url https://download.pytorch.org/whl/cu124 || true
pip install --quiet \
    "transformers>=4.44.0,<4.50.0" \
    "peft>=0.12.0,<0.14.0" \
    "accelerate>=0.33.0,<0.36.0" \
    "huggingface_hub>=0.24.0" \
    "pydantic>=2.0.0" \
    "matplotlib>=3.7.0" \
    "datasets>=2.20.0,<3.0.0" \
    "bitsandbytes==0.43.3"

ln -sfn /data /tmp/chaosops
export PYTHONPATH="/tmp:${PYTHONPATH:-}"

mkdir -p /workspace/{a,b}
cd /workspace

for tag in a b; do
    case "$tag" in
        a) repo="$ADAPTER_A" ;;
        b) repo="$ADAPTER_B" ;;
    esac
    echo "==[chaosops]== downloading $repo → /workspace/$tag/lora_adapter"
    hf download "$repo" --repo-type model --local-dir "/workspace/$tag/lora_adapter" >/dev/null

    echo "==[chaosops]== evaluating $tag ($repo)"
    python -m chaosops.train.evaluate \
        --policies random heuristic oracle trained \
        --adapter-path "/workspace/$tag/lora_adapter" \
        --episodes-per-type "${EPISODES_PER_TYPE}" \
        --out-dir "/workspace/$tag/eval"
done

echo "==[chaosops]== building A/B report and overlay plot"
ADAPTER_A="$ADAPTER_A" ADAPTER_B="$ADAPTER_B" python - <<'PY'
import json, os
from pathlib import Path
from huggingface_hub import HfApi
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

repo_a = os.environ["ADAPTER_A"]
repo_b = os.environ["ADAPTER_B"]

def load(tag):
    return json.loads(Path(f"/workspace/{tag}/eval/evaluation.json").read_text())

a = load("a")
b = load("b")

def by(agg, policy, tier):
    return next((x for x in agg if x["policy"] == policy and x["tier"] == tier), None)

tiers = ["easy", "medium", "hard"]
report_lines = [
    "ChaosOps AI — A/B comparison",
    f"  A = {repo_a}",
    f"  B = {repo_b}",
    "",
    f"{'tier':<8} {'policy':<10} {'A.reward':>10} {'B.reward':>10}  Δ(B-A)",
    "-" * 60,
]
for tier in tiers:
    for policy in ["random", "heuristic", "oracle", "trained"]:
        ax = by(a["aggregates"], policy, tier)
        bx = by(b["aggregates"], policy, tier)
        if not ax or not bx:
            continue
        delta = bx["mean_reward"] - ax["mean_reward"]
        report_lines.append(
            f"{tier:<8} {policy:<10} {ax['mean_reward']:>+10.1f} {bx['mean_reward']:>+10.1f}  {delta:+10.1f}"
        )
report = "\n".join(report_lines)
Path("/workspace/ab_report.txt").write_text(report + "\n")
print(report)

# Determine winner by sum of trained mean rewards across tiers
sum_a = sum(by(a["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(a["aggregates"], "trained", t))
sum_b = sum(by(b["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(b["aggregates"], "trained", t))
winner_repo = repo_a if sum_a >= sum_b else repo_b
print(f"\nWINNER (higher summed mean trained reward): {winner_repo}  ({max(sum_a, sum_b):+.1f} vs {min(sum_a, sum_b):+.1f})")

# Build overlay plot (baselines from A; trained-A and trained-B both shown)
fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160)
color = {"random": "#c0392b", "heuristic": "#2980b9", "oracle": "#27ae60",
         "trained_a": "#8e44ad", "trained_b": "#d35400"}
for policy in ["random", "heuristic", "oracle"]:
    xs, ys = [], []
    for t in tiers:
        m = by(a["aggregates"], policy, t)
        if m: xs.append(t); ys.append(m["mean_reward"])
    ax.plot(xs, ys, marker="o", label=policy, color=color[policy], linewidth=2.4, markersize=8)
for tag, repo, key in [("A", repo_a, "trained_a"), ("B", repo_b, "trained_b")]:
    src = a if tag == "A" else b
    xs, ys = [], []
    for t in tiers:
        m = by(src["aggregates"], "trained", t)
        if m: xs.append(t); ys.append(m["mean_reward"])
    ax.plot(xs, ys, marker="s", label=f"trained ({tag}: {repo.split('/')[-1]})",
            color=color[key], linewidth=2.4, markersize=8, linestyle="--")

ax.axhline(0, color="#888", linewidth=0.6)
ax.set_title("ChaosOps AI — A/B trained-policy comparison vs. baselines", fontsize=13)
ax.set_xlabel("Difficulty tier", fontsize=12)
ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12)
ax.grid(True, linestyle=":", alpha=0.4)
ax.legend(loc="lower left", fontsize=10, framealpha=0.95)
fig.tight_layout()
fig.savefig("/workspace/ab_comparison_curve.png")

# Upload to WINNER repo
api = HfApi()
api.upload_file(path_or_fileobj="/workspace/ab_report.txt",
                path_in_repo="ab_report.txt",
                repo_id=winner_repo, repo_type="model",
                commit_message="A/B comparison report")
api.upload_file(path_or_fileobj="/workspace/ab_comparison_curve.png",
                path_in_repo="ab_comparison_curve.png",
                repo_id=winner_repo, repo_type="model",
                commit_message="A/B comparison curve")
print("uploaded to", winner_repo)
PY

echo "==[chaosops]== done"