Spaces:

helloAK96
/

chaosops

Running

helloAK96 Claude Opus 4.7 commited on 14 days ago

Commit

b36bddc

1 Parent(s): 5b2169b

Add HF Jobs entry-point for post-training evaluation

scripts/jobs_evaluate.sh: pull the LoRA from helloAK96/chaosops-grpo-lora,
run chaosops.train.evaluate across all (policy × tier × failure_type),
regenerate a labelled comparison_curve.png, upload back to the model repo.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (1) hide show

scripts/jobs_evaluate.sh +115 -0

scripts/jobs_evaluate.sh ADDED Viewed

	@@ -0,0 +1,115 @@

+#!/usr/bin/env bash
+# ChaosOps AI — post-training evaluation Job entry point.
+#
+# Pulls the LoRA from the Hub, runs `chaosops.train.evaluate` on EASY/
+# MEDIUM/HARD across all 9 failure types, regenerates a labelled
+# comparison_curve.png, and uploads everything back to the Space repo.
+set -euo pipefail
+EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}"
+HUB_REPO_ID="${HUB_REPO_ID:-helloAK96/chaosops-grpo-lora}"
+echo "==[chaosops]== installing deps"
+pip install --quiet --upgrade pip
+pip install --quiet --no-deps "torch==2.4.1+cu124" \
+    --index-url https://download.pytorch.org/whl/cu124 || true
+pip install --quiet \
+    "transformers>=4.44.0,<4.50.0" \
+    "peft>=0.12.0,<0.14.0" \
+    "accelerate>=0.33.0,<0.36.0" \
+    "huggingface_hub>=0.24.0" \
+    "pydantic>=2.0.0" \
+    "matplotlib>=3.7.0" \
+    "datasets>=2.20.0,<3.0.0" \
+    "bitsandbytes==0.43.3"
+echo "==[chaosops]== preparing source tree"
+ln -sfn /data /tmp/chaosops
+export PYTHONPATH="/tmp:${PYTHONPATH:-}"
+mkdir -p /workspace
+cd /workspace
+echo "==[chaosops]== downloading LoRA from ${HUB_REPO_ID}"
+hf download "${HUB_REPO_ID}" --repo-type model \
+    --local-dir /workspace/lora_adapter >/dev/null
+echo "==[chaosops]== running evaluation sweep ($EPISODES_PER_TYPE episodes/type × 9 types × 3 tiers)"
+python -m chaosops.train.evaluate \
+    --policies random heuristic oracle trained \
+    --adapter-path /workspace/lora_adapter \
+    --episodes-per-type "${EPISODES_PER_TYPE}" \
+    --out-dir /workspace/artifacts/evaluation
+echo "==[chaosops]== rendering labelled comparison_curve.png"
+python - <<'PY'
+import json
+from pathlib import Path
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+eval_path = Path("/workspace/artifacts/evaluation/evaluation.json")
+data = json.loads(eval_path.read_text())
+aggregates = data["aggregates"]
+tiers = ["easy", "medium", "hard"]
+policies = ["random", "heuristic", "oracle", "trained"]
+color = {"random": "#c0392b", "heuristic": "#2980b9",
+         "oracle": "#27ae60", "trained": "#8e44ad"}
+fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160)
+for policy in policies:
+    xs, ys = [], []
+    for tier in tiers:
+        match = next(
+            (a for a in aggregates if a["policy"] == policy and a["tier"] == tier),
+            None,
+        )
+        if match is None:
+            continue
+        xs.append(tier)
+        ys.append(match["mean_reward"])
+    if xs:
+        ax.plot(xs, ys, marker="o", label=policy,
+                color=color[policy], linewidth=2.4, markersize=8)
+ax.axhline(0, color="#888", linewidth=0.6)
+ax.set_title(
+    "ChaosOps AI — Trained Qwen 1.5B vs. baselines\n"
+    "(5 seeds × 9 failure types × 3 tiers, mean cumulative reward)",
+    fontsize=13,
+)
+ax.set_xlabel("Difficulty tier", fontsize=12)
+ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12)
+ax.grid(True, linestyle=":", alpha=0.4)
+ax.legend(loc="lower left", fontsize=11, framealpha=0.95)
+fig.tight_layout()
+fig.savefig("/workspace/artifacts/evaluation/comparison_curve.png")
+print("wrote comparison_curve.png")
+PY
+echo "==[chaosops]== uploading artifacts to ${HUB_REPO_ID}"
+HUB_REPO_ID="${HUB_REPO_ID}" python - <<'PY'
+import os
+from pathlib import Path
+from huggingface_hub import HfApi
+api = HfApi()
+repo_id = os.environ["HUB_REPO_ID"]
+for src, dst in [
+    ("/workspace/artifacts/evaluation/comparison_curve.png", "comparison_curve.png"),
+    ("/workspace/artifacts/evaluation/evaluation_summary.txt", "evaluation_summary.txt"),
+    ("/workspace/artifacts/evaluation/evaluation.json", "evaluation.json"),
+]:
+    if Path(src).exists():
+        api.upload_file(
+            path_or_fileobj=src,
+            path_in_repo=dst,
+            repo_id=repo_id,
+            repo_type="model",
+            commit_message=f"Add post-training {dst}",
+        )
+        print("uploaded", dst)
+PY
+echo "==[chaosops]== summary"
+cat /workspace/artifacts/evaluation/evaluation_summary.txt
+echo "==[chaosops]== done"