| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}" |
| ADAPTER_A="${ADAPTER_A:?ADAPTER_A required}" |
| ADAPTER_B="${ADAPTER_B:?ADAPTER_B required}" |
|
|
| echo "==[chaosops]== installing deps" |
| pip install --quiet --upgrade pip |
| pip install --quiet --no-deps "torch==2.4.1+cu124" \ |
| --index-url https://download.pytorch.org/whl/cu124 || true |
| pip install --quiet \ |
| "transformers>=4.44.0,<4.50.0" \ |
| "peft>=0.12.0,<0.14.0" \ |
| "accelerate>=0.33.0,<0.36.0" \ |
| "huggingface_hub>=0.24.0" \ |
| "pydantic>=2.0.0" \ |
| "matplotlib>=3.7.0" \ |
| "datasets>=2.20.0,<3.0.0" \ |
| "bitsandbytes==0.43.3" |
|
|
| ln -sfn /data /tmp/chaosops |
| export PYTHONPATH="/tmp:${PYTHONPATH:-}" |
|
|
| mkdir -p /workspace/{a,b} |
| cd /workspace |
|
|
| for tag in a b; do |
| case "$tag" in |
| a) repo="$ADAPTER_A" ;; |
| b) repo="$ADAPTER_B" ;; |
| esac |
| echo "==[chaosops]== downloading $repo β /workspace/$tag/lora_adapter" |
| hf download "$repo" --repo-type model --local-dir "/workspace/$tag/lora_adapter" >/dev/null |
|
|
| echo "==[chaosops]== evaluating $tag ($repo)" |
| python -m chaosops.train.evaluate \ |
| --policies random heuristic oracle trained \ |
| --adapter-path "/workspace/$tag/lora_adapter" \ |
| --episodes-per-type "${EPISODES_PER_TYPE}" \ |
| --out-dir "/workspace/$tag/eval" |
| done |
|
|
| echo "==[chaosops]== building A/B report and overlay plot" |
| ADAPTER_A="$ADAPTER_A" ADAPTER_B="$ADAPTER_B" python - <<'PY' |
| import json, os |
| from pathlib import Path |
| from huggingface_hub import HfApi |
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
|
|
| repo_a = os.environ["ADAPTER_A"] |
| repo_b = os.environ["ADAPTER_B"] |
|
|
| def load(tag): |
| return json.loads(Path(f"/workspace/{tag}/eval/evaluation.json").read_text()) |
|
|
| a = load("a") |
| b = load("b") |
|
|
| def by(agg, policy, tier): |
| return next((x for x in agg if x["policy"] == policy and x["tier"] == tier), None) |
|
|
| tiers = ["easy", "medium", "hard"] |
| report_lines = [ |
| "ChaosOps AI β A/B comparison", |
| f" A = {repo_a}", |
| f" B = {repo_b}", |
| "", |
| f"{'tier':<8} {'policy':<10} {'A.reward':>10} {'B.reward':>10} Ξ(B-A)", |
| "-" * 60, |
| ] |
| for tier in tiers: |
| for policy in ["random", "heuristic", "oracle", "trained"]: |
| ax = by(a["aggregates"], policy, tier) |
| bx = by(b["aggregates"], policy, tier) |
| if not ax or not bx: |
| continue |
| delta = bx["mean_reward"] - ax["mean_reward"] |
| report_lines.append( |
| f"{tier:<8} {policy:<10} {ax['mean_reward']:>+10.1f} {bx['mean_reward']:>+10.1f} {delta:+10.1f}" |
| ) |
| report = "\n".join(report_lines) |
| Path("/workspace/ab_report.txt").write_text(report + "\n") |
| print(report) |
|
|
| # Determine winner by sum of trained mean rewards across tiers |
| sum_a = sum(by(a["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(a["aggregates"], "trained", t)) |
| sum_b = sum(by(b["aggregates"], "trained", t)["mean_reward"] for t in tiers if by(b["aggregates"], "trained", t)) |
| winner_repo = repo_a if sum_a >= sum_b else repo_b |
| print(f"\nWINNER (higher summed mean trained reward): {winner_repo} ({max(sum_a, sum_b):+.1f} vs {min(sum_a, sum_b):+.1f})") |
|
|
| |
| fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160) |
| color = {"random": "#c0392b", "heuristic": "#2980b9", "oracle": "#27ae60", |
| "trained_a": "#8e44ad", "trained_b": "#d35400"} |
| for policy in ["random", "heuristic", "oracle"]: |
| xs, ys = [], [] |
| for t in tiers: |
| m = by(a["aggregates"], policy, t) |
| if m: xs.append(t); ys.append(m["mean_reward"]) |
| ax.plot(xs, ys, marker="o", label=policy, color=color[policy], linewidth=2.4, markersize=8) |
| for tag, repo, key in [("A", repo_a, "trained_a"), ("B", repo_b, "trained_b")]: |
| src = a if tag == "A" else b |
| xs, ys = [], [] |
| for t in tiers: |
| m = by(src["aggregates"], "trained", t) |
| if m: xs.append(t); ys.append(m["mean_reward"]) |
| ax.plot(xs, ys, marker="s", label=f"trained ({tag}: {repo.split('/')[-1]})", |
| color=color[key], linewidth=2.4, markersize=8, linestyle="--") |
|
|
| ax.axhline(0, color="#888", linewidth=0.6) |
| ax.set_title("ChaosOps AI β A/B trained-policy comparison vs. baselines", fontsize=13) |
| ax.set_xlabel("Difficulty tier", fontsize=12) |
| ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12) |
| ax.grid(True, linestyle=":", alpha=0.4) |
| ax.legend(loc="lower left", fontsize=10, framealpha=0.95) |
| fig.tight_layout() |
| fig.savefig("/workspace/ab_comparison_curve.png") |
|
|
| |
| api = HfApi() |
| api.upload_file(path_or_fileobj="/workspace/ab_report.txt", |
| path_in_repo="ab_report.txt", |
| repo_id=winner_repo, repo_type="model", |
| commit_message="A/B comparison report") |
| api.upload_file(path_or_fileobj="/workspace/ab_comparison_curve.png", |
| path_in_repo="ab_comparison_curve.png", |
| repo_id=winner_repo, repo_type="model", |
| commit_message="A/B comparison curve") |
| print("uploaded to", winner_repo) |
| PY |
|
|
| echo "==[chaosops]== done" |
|
|