helloAK96 Claude Opus 4.7 commited on
Commit
e6e88e7
Β·
1 Parent(s): 6e35cec

Add A/B comparison Job for trained-policy showdown

Browse files

scripts/ab_compare.sh evaluates two LoRA repos on the same 540-episode
sweep, builds a side-by-side report and an overlay plot, and uploads
both to whichever repo wins on summed trained mean reward across tiers.
Used to pick between the Phase-1 and Phase-2 LoRAs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/ab_compare.sh +150 -0
scripts/ab_compare.sh ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # ChaosOps AI β€” A/B comparison Job entry-point.
3
+ #
4
+ # Pulls two LoRA adapters, evaluates each as the `trained` policy across
5
+ # the full curriculum, writes a single side-by-side report, and uploads
6
+ # everything to the WINNER's model repo.
7
+ #
8
+ # Required env:
9
+ # ADAPTER_A repo id, e.g. helloAK96/chaosops-grpo-lora-p1
10
+ # ADAPTER_B repo id, e.g. helloAK96/chaosops-grpo-lora-p2
11
+ # EPISODES_PER_TYPE default 5
12
+ #
13
+ # Output (uploaded to whichever repo wins on summed mean reward):
14
+ # ab_report.txt β€” side-by-side per-tier table
15
+ # ab_comparison_curve.png β€” both trained lines overlaid on baselines
16
+
17
+ set -euo pipefail
18
+
19
+ EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}"
20
+ ADAPTER_A="${ADAPTER_A:?ADAPTER_A required}"
21
+ ADAPTER_B="${ADAPTER_B:?ADAPTER_B required}"
22
+
23
+ echo "==[chaosops]== installing deps"
24
+ pip install --quiet --upgrade pip
25
+ pip install --quiet --no-deps "torch==2.4.1+cu124" \
26
+ --index-url https://download.pytorch.org/whl/cu124 || true
27
+ pip install --quiet \
28
+ "transformers>=4.44.0,<4.50.0" \
29
+ "peft>=0.12.0,<0.14.0" \
30
+ "accelerate>=0.33.0,<0.36.0" \
31
+ "huggingface_hub>=0.24.0" \
32
+ "pydantic>=2.0.0" \
33
+ "matplotlib>=3.7.0" \
34
+ "datasets>=2.20.0,<3.0.0" \
35
+ "bitsandbytes==0.43.3"
36
+
37
+ ln -sfn /data /tmp/chaosops
38
+ export PYTHONPATH="/tmp:${PYTHONPATH:-}"
39
+
40
+ mkdir -p /workspace/{a,b}
41
+ cd /workspace
42
+
43
+ for tag in a b; do
44
+ case "$tag" in
45
+ a) repo="$ADAPTER_A" ;;
46
+ b) repo="$ADAPTER_B" ;;
47
+ esac
48
+ echo "==[chaosops]== downloading $repo β†’ /workspace/$tag/lora_adapter"
49
+ hf download "$repo" --repo-type model --local-dir "/workspace/$tag/lora_adapter" >/dev/null
50
+
51
+ echo "==[chaosops]== evaluating $tag ($repo)"
52
+ python -m chaosops.train.evaluate \
53
+ --policies random heuristic oracle trained \
54
+ --adapter-path "/workspace/$tag/lora_adapter" \
55
+ --episodes-per-type "${EPISODES_PER_TYPE}" \
56
+ --out-dir "/workspace/$tag/eval"
57
+ done
58
+
59
+ echo "==[chaosops]== building A/B report and overlay plot"
60
+ ADAPTER_A="$ADAPTER_A" ADAPTER_B="$ADAPTER_B" python - <<'PY'
61
+ import json, os
62
+ from pathlib import Path
63
+ from huggingface_hub import HfApi
64
+ import matplotlib
65
+ matplotlib.use("Agg")
66
+ import matplotlib.pyplot as plt
67
+
68
+ repo_a = os.environ["ADAPTER_A"]
69
+ repo_b = os.environ["ADAPTER_B"]
70
+
71
+ def load(tag):
72
+ return json.loads(Path(f"/workspace/{tag}/eval/evaluation.json").read_text())
73
+
74
+ a = load("a")
75
+ b = load("b")
76
+
77
+ def by(agg, policy, tier):
78
+ return next((x for x in agg if x["policy"] == policy and x["tier"] == tier), None)
79
+
80
+ tiers = ["easy", "medium", "hard"]
81
+ report_lines = [
82
+ "ChaosOps AI β€” A/B comparison",
83
+ f" A = {repo_a}",
84
+ f" B = {repo_b}",
85
+ "",
86
+ f"{'tier':<8} {'policy':<10} {'A.reward':>10} {'B.reward':>10} Ξ”(B-A)",
87
+ "-" * 60,
88
+ ]
89
+ for tier in tiers:
90
+ for policy in ["random", "heuristic", "oracle", "trained"]:
91
+ ax = by(a["aggregates"], policy, tier)
92
+ bx = by(b["aggregates"], policy, tier)
93
+ if not ax or not bx:
94
+ continue
95
+ delta = bx["mean_reward"] - ax["mean_reward"]
96
+ report_lines.append(
97
+ f"{tier:<8} {policy:<10} {ax['mean_reward']:>+10.1f} {bx['mean_reward']:>+10.1f} {delta:+10.1f}"
98
+ )
99
+ report = "\n".join(report_lines)
100
+ Path("/workspace/ab_report.txt").write_text(report + "\n")
101
+ print(report)
102
+
103
+ # Determine winner by sum of trained mean rewards across tiers
104
+ sum_a = sum(by(a["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(a["aggregates"], "trained", t))
105
+ sum_b = sum(by(b["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(b["aggregates"], "trained", t))
106
+ winner_repo = repo_a if sum_a >= sum_b else repo_b
107
+ print(f"\nWINNER (higher summed mean trained reward): {winner_repo} ({max(sum_a, sum_b):+.1f} vs {min(sum_a, sum_b):+.1f})")
108
+
109
+ # Build overlay plot (baselines from A; trained-A and trained-B both shown)
110
+ fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160)
111
+ color = {"random": "#c0392b", "heuristic": "#2980b9", "oracle": "#27ae60",
112
+ "trained_a": "#8e44ad", "trained_b": "#d35400"}
113
+ for policy in ["random", "heuristic", "oracle"]:
114
+ xs, ys = [], []
115
+ for t in tiers:
116
+ m = by(a["aggregates"], policy, t)
117
+ if m: xs.append(t); ys.append(m["mean_reward"])
118
+ ax.plot(xs, ys, marker="o", label=policy, color=color[policy], linewidth=2.4, markersize=8)
119
+ for tag, repo, key in [("A", repo_a, "trained_a"), ("B", repo_b, "trained_b")]:
120
+ src = a if tag == "A" else b
121
+ xs, ys = [], []
122
+ for t in tiers:
123
+ m = by(src["aggregates"], "trained", t)
124
+ if m: xs.append(t); ys.append(m["mean_reward"])
125
+ ax.plot(xs, ys, marker="s", label=f"trained ({tag}: {repo.split('/')[-1]})",
126
+ color=color[key], linewidth=2.4, markersize=8, linestyle="--")
127
+
128
+ ax.axhline(0, color="#888", linewidth=0.6)
129
+ ax.set_title("ChaosOps AI β€” A/B trained-policy comparison vs. baselines", fontsize=13)
130
+ ax.set_xlabel("Difficulty tier", fontsize=12)
131
+ ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12)
132
+ ax.grid(True, linestyle=":", alpha=0.4)
133
+ ax.legend(loc="lower left", fontsize=10, framealpha=0.95)
134
+ fig.tight_layout()
135
+ fig.savefig("/workspace/ab_comparison_curve.png")
136
+
137
+ # Upload to WINNER repo
138
+ api = HfApi()
139
+ api.upload_file(path_or_fileobj="/workspace/ab_report.txt",
140
+ path_in_repo="ab_report.txt",
141
+ repo_id=winner_repo, repo_type="model",
142
+ commit_message="A/B comparison report")
143
+ api.upload_file(path_or_fileobj="/workspace/ab_comparison_curve.png",
144
+ path_in_repo="ab_comparison_curve.png",
145
+ repo_id=winner_repo, repo_type="model",
146
+ commit_message="A/B comparison curve")
147
+ print("uploaded to", winner_repo)
148
+ PY
149
+
150
+ echo "==[chaosops]== done"