#!/usr/bin/env bash # ChaosOps AI — post-training evaluation Job entry point. # # Pulls the LoRA from the Hub, runs `chaosops.train.evaluate` on EASY/ # MEDIUM/HARD across all 9 failure types, regenerates a labelled # comparison_curve.png, and uploads everything back to the Space repo. set -euo pipefail EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}" HUB_REPO_ID="${HUB_REPO_ID:-helloAK96/chaosops-grpo-lora}" echo "==[chaosops]== installing deps" pip install --quiet --upgrade pip pip install --quiet --no-deps "torch==2.4.1+cu124" \ --index-url https://download.pytorch.org/whl/cu124 || true pip install --quiet \ "transformers>=4.44.0,<4.50.0" \ "peft>=0.12.0,<0.14.0" \ "accelerate>=0.33.0,<0.36.0" \ "huggingface_hub>=0.24.0" \ "pydantic>=2.0.0" \ "matplotlib>=3.7.0" \ "datasets>=2.20.0,<3.0.0" \ "bitsandbytes==0.43.3" echo "==[chaosops]== preparing source tree" ln -sfn /data /tmp/chaosops export PYTHONPATH="/tmp:${PYTHONPATH:-}" mkdir -p /workspace cd /workspace echo "==[chaosops]== downloading LoRA from ${HUB_REPO_ID}" hf download "${HUB_REPO_ID}" --repo-type model \ --local-dir /workspace/lora_adapter >/dev/null echo "==[chaosops]== running evaluation sweep ($EPISODES_PER_TYPE episodes/type × 9 types × 3 tiers)" python -m chaosops.train.evaluate \ --policies random heuristic oracle trained \ --adapter-path /workspace/lora_adapter \ --episodes-per-type "${EPISODES_PER_TYPE}" \ --out-dir /workspace/artifacts/evaluation echo "==[chaosops]== rendering labelled comparison_curve.png" python - <<'PY' import json from pathlib import Path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt eval_path = Path("/workspace/artifacts/evaluation/evaluation.json") data = json.loads(eval_path.read_text()) aggregates = data["aggregates"] tiers = ["easy", "medium", "hard"] policies = ["random", "heuristic", "oracle", "trained"] color = {"random": "#c0392b", "heuristic": "#2980b9", "oracle": "#27ae60", "trained": "#8e44ad"} fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160) for policy in policies: xs, ys = [], [] for tier in tiers: match = next( (a for a in aggregates if a["policy"] == policy and a["tier"] == tier), None, ) if match is None: continue xs.append(tier) ys.append(match["mean_reward"]) if xs: ax.plot(xs, ys, marker="o", label=policy, color=color[policy], linewidth=2.4, markersize=8) ax.axhline(0, color="#888", linewidth=0.6) ax.set_title( "ChaosOps AI — Trained Qwen 1.5B vs. baselines\n" "(5 seeds × 9 failure types × 3 tiers, mean cumulative reward)", fontsize=13, ) ax.set_xlabel("Difficulty tier", fontsize=12) ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12) ax.grid(True, linestyle=":", alpha=0.4) ax.legend(loc="lower left", fontsize=11, framealpha=0.95) fig.tight_layout() fig.savefig("/workspace/artifacts/evaluation/comparison_curve.png") print("wrote comparison_curve.png") PY echo "==[chaosops]== uploading artifacts to ${HUB_REPO_ID}" HUB_REPO_ID="${HUB_REPO_ID}" python - <<'PY' import os from pathlib import Path from huggingface_hub import HfApi api = HfApi() repo_id = os.environ["HUB_REPO_ID"] for src, dst in [ ("/workspace/artifacts/evaluation/comparison_curve.png", "comparison_curve.png"), ("/workspace/artifacts/evaluation/evaluation_summary.txt", "evaluation_summary.txt"), ("/workspace/artifacts/evaluation/evaluation.json", "evaluation.json"), ]: if Path(src).exists(): api.upload_file( path_or_fileobj=src, path_in_repo=dst, repo_id=repo_id, repo_type="model", commit_message=f"Add post-training {dst}", ) print("uploaded", dst) PY echo "==[chaosops]== summary" cat /workspace/artifacts/evaluation/evaluation_summary.txt echo "==[chaosops]== done"