File size: 5,719 Bytes
e6e88e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env bash
# ChaosOps AI β€” A/B comparison Job entry-point.
#
# Pulls two LoRA adapters, evaluates each as the `trained` policy across
# the full curriculum, writes a single side-by-side report, and uploads
# everything to the WINNER's model repo.
#
# Required env:
#   ADAPTER_A         repo id, e.g. helloAK96/chaosops-grpo-lora-p1
#   ADAPTER_B         repo id, e.g. helloAK96/chaosops-grpo-lora-p2
#   EPISODES_PER_TYPE default 5
#
# Output (uploaded to whichever repo wins on summed mean reward):
#   ab_report.txt              β€” side-by-side per-tier table
#   ab_comparison_curve.png    β€” both trained lines overlaid on baselines

set -euo pipefail

EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}"
ADAPTER_A="${ADAPTER_A:?ADAPTER_A required}"
ADAPTER_B="${ADAPTER_B:?ADAPTER_B required}"

echo "==[chaosops]== installing deps"
pip install --quiet --upgrade pip
pip install --quiet --no-deps "torch==2.4.1+cu124" \
    --index-url https://download.pytorch.org/whl/cu124 || true
pip install --quiet \
    "transformers>=4.44.0,<4.50.0" \
    "peft>=0.12.0,<0.14.0" \
    "accelerate>=0.33.0,<0.36.0" \
    "huggingface_hub>=0.24.0" \
    "pydantic>=2.0.0" \
    "matplotlib>=3.7.0" \
    "datasets>=2.20.0,<3.0.0" \
    "bitsandbytes==0.43.3"

ln -sfn /data /tmp/chaosops
export PYTHONPATH="/tmp:${PYTHONPATH:-}"

mkdir -p /workspace/{a,b}
cd /workspace

for tag in a b; do
    case "$tag" in
        a) repo="$ADAPTER_A" ;;
        b) repo="$ADAPTER_B" ;;
    esac
    echo "==[chaosops]== downloading $repo β†’ /workspace/$tag/lora_adapter"
    hf download "$repo" --repo-type model --local-dir "/workspace/$tag/lora_adapter" >/dev/null

    echo "==[chaosops]== evaluating $tag ($repo)"
    python -m chaosops.train.evaluate \
        --policies random heuristic oracle trained \
        --adapter-path "/workspace/$tag/lora_adapter" \
        --episodes-per-type "${EPISODES_PER_TYPE}" \
        --out-dir "/workspace/$tag/eval"
done

echo "==[chaosops]== building A/B report and overlay plot"
ADAPTER_A="$ADAPTER_A" ADAPTER_B="$ADAPTER_B" python - <<'PY'
import json, os
from pathlib import Path
from huggingface_hub import HfApi
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

repo_a = os.environ["ADAPTER_A"]
repo_b = os.environ["ADAPTER_B"]

def load(tag):
    return json.loads(Path(f"/workspace/{tag}/eval/evaluation.json").read_text())

a = load("a")
b = load("b")

def by(agg, policy, tier):
    return next((x for x in agg if x["policy"] == policy and x["tier"] == tier), None)

tiers = ["easy", "medium", "hard"]
report_lines = [
    "ChaosOps AI β€” A/B comparison",
    f"  A = {repo_a}",
    f"  B = {repo_b}",
    "",
    f"{'tier':<8} {'policy':<10} {'A.reward':>10} {'B.reward':>10}  Ξ”(B-A)",
    "-" * 60,
]
for tier in tiers:
    for policy in ["random", "heuristic", "oracle", "trained"]:
        ax = by(a["aggregates"], policy, tier)
        bx = by(b["aggregates"], policy, tier)
        if not ax or not bx:
            continue
        delta = bx["mean_reward"] - ax["mean_reward"]
        report_lines.append(
            f"{tier:<8} {policy:<10} {ax['mean_reward']:>+10.1f} {bx['mean_reward']:>+10.1f}  {delta:+10.1f}"
        )
report = "\n".join(report_lines)
Path("/workspace/ab_report.txt").write_text(report + "\n")
print(report)

# Determine winner by sum of trained mean rewards across tiers
sum_a = sum(by(a["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(a["aggregates"], "trained", t))
sum_b = sum(by(b["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(b["aggregates"], "trained", t))
winner_repo = repo_a if sum_a >= sum_b else repo_b
print(f"\nWINNER (higher summed mean trained reward): {winner_repo}  ({max(sum_a, sum_b):+.1f} vs {min(sum_a, sum_b):+.1f})")

# Build overlay plot (baselines from A; trained-A and trained-B both shown)
fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160)
color = {"random": "#c0392b", "heuristic": "#2980b9", "oracle": "#27ae60",
         "trained_a": "#8e44ad", "trained_b": "#d35400"}
for policy in ["random", "heuristic", "oracle"]:
    xs, ys = [], []
    for t in tiers:
        m = by(a["aggregates"], policy, t)
        if m: xs.append(t); ys.append(m["mean_reward"])
    ax.plot(xs, ys, marker="o", label=policy, color=color[policy], linewidth=2.4, markersize=8)
for tag, repo, key in [("A", repo_a, "trained_a"), ("B", repo_b, "trained_b")]:
    src = a if tag == "A" else b
    xs, ys = [], []
    for t in tiers:
        m = by(src["aggregates"], "trained", t)
        if m: xs.append(t); ys.append(m["mean_reward"])
    ax.plot(xs, ys, marker="s", label=f"trained ({tag}: {repo.split('/')[-1]})",
            color=color[key], linewidth=2.4, markersize=8, linestyle="--")

ax.axhline(0, color="#888", linewidth=0.6)
ax.set_title("ChaosOps AI β€” A/B trained-policy comparison vs. baselines", fontsize=13)
ax.set_xlabel("Difficulty tier", fontsize=12)
ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12)
ax.grid(True, linestyle=":", alpha=0.4)
ax.legend(loc="lower left", fontsize=10, framealpha=0.95)
fig.tight_layout()
fig.savefig("/workspace/ab_comparison_curve.png")

# Upload to WINNER repo
api = HfApi()
api.upload_file(path_or_fileobj="/workspace/ab_report.txt",
                path_in_repo="ab_report.txt",
                repo_id=winner_repo, repo_type="model",
                commit_message="A/B comparison report")
api.upload_file(path_or_fileobj="/workspace/ab_comparison_curve.png",
                path_in_repo="ab_comparison_curve.png",
                repo_id=winner_repo, repo_type="model",
                commit_message="A/B comparison curve")
print("uploaded to", winner_repo)
PY

echo "==[chaosops]== done"