helloAK96 Claude Opus 4.7 commited on
Commit
b36bddc
·
1 Parent(s): 5b2169b

Add HF Jobs entry-point for post-training evaluation

Browse files

scripts/jobs_evaluate.sh: pull the LoRA from helloAK96/chaosops-grpo-lora,
run chaosops.train.evaluate across all (policy × tier × failure_type),
regenerate a labelled comparison_curve.png, upload back to the model repo.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/jobs_evaluate.sh +115 -0
scripts/jobs_evaluate.sh ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # ChaosOps AI — post-training evaluation Job entry point.
3
+ #
4
+ # Pulls the LoRA from the Hub, runs `chaosops.train.evaluate` on EASY/
5
+ # MEDIUM/HARD across all 9 failure types, regenerates a labelled
6
+ # comparison_curve.png, and uploads everything back to the Space repo.
7
+
8
+ set -euo pipefail
9
+
10
+ EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}"
11
+ HUB_REPO_ID="${HUB_REPO_ID:-helloAK96/chaosops-grpo-lora}"
12
+
13
+ echo "==[chaosops]== installing deps"
14
+ pip install --quiet --upgrade pip
15
+ pip install --quiet --no-deps "torch==2.4.1+cu124" \
16
+ --index-url https://download.pytorch.org/whl/cu124 || true
17
+ pip install --quiet \
18
+ "transformers>=4.44.0,<4.50.0" \
19
+ "peft>=0.12.0,<0.14.0" \
20
+ "accelerate>=0.33.0,<0.36.0" \
21
+ "huggingface_hub>=0.24.0" \
22
+ "pydantic>=2.0.0" \
23
+ "matplotlib>=3.7.0" \
24
+ "datasets>=2.20.0,<3.0.0" \
25
+ "bitsandbytes==0.43.3"
26
+
27
+ echo "==[chaosops]== preparing source tree"
28
+ ln -sfn /data /tmp/chaosops
29
+ export PYTHONPATH="/tmp:${PYTHONPATH:-}"
30
+
31
+ mkdir -p /workspace
32
+ cd /workspace
33
+
34
+ echo "==[chaosops]== downloading LoRA from ${HUB_REPO_ID}"
35
+ hf download "${HUB_REPO_ID}" --repo-type model \
36
+ --local-dir /workspace/lora_adapter >/dev/null
37
+
38
+ echo "==[chaosops]== running evaluation sweep ($EPISODES_PER_TYPE episodes/type × 9 types × 3 tiers)"
39
+ python -m chaosops.train.evaluate \
40
+ --policies random heuristic oracle trained \
41
+ --adapter-path /workspace/lora_adapter \
42
+ --episodes-per-type "${EPISODES_PER_TYPE}" \
43
+ --out-dir /workspace/artifacts/evaluation
44
+
45
+ echo "==[chaosops]== rendering labelled comparison_curve.png"
46
+ python - <<'PY'
47
+ import json
48
+ from pathlib import Path
49
+ import matplotlib
50
+ matplotlib.use("Agg")
51
+ import matplotlib.pyplot as plt
52
+
53
+ eval_path = Path("/workspace/artifacts/evaluation/evaluation.json")
54
+ data = json.loads(eval_path.read_text())
55
+ aggregates = data["aggregates"]
56
+ tiers = ["easy", "medium", "hard"]
57
+ policies = ["random", "heuristic", "oracle", "trained"]
58
+ color = {"random": "#c0392b", "heuristic": "#2980b9",
59
+ "oracle": "#27ae60", "trained": "#8e44ad"}
60
+ fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160)
61
+ for policy in policies:
62
+ xs, ys = [], []
63
+ for tier in tiers:
64
+ match = next(
65
+ (a for a in aggregates if a["policy"] == policy and a["tier"] == tier),
66
+ None,
67
+ )
68
+ if match is None:
69
+ continue
70
+ xs.append(tier)
71
+ ys.append(match["mean_reward"])
72
+ if xs:
73
+ ax.plot(xs, ys, marker="o", label=policy,
74
+ color=color[policy], linewidth=2.4, markersize=8)
75
+ ax.axhline(0, color="#888", linewidth=0.6)
76
+ ax.set_title(
77
+ "ChaosOps AI — Trained Qwen 1.5B vs. baselines\n"
78
+ "(5 seeds × 9 failure types × 3 tiers, mean cumulative reward)",
79
+ fontsize=13,
80
+ )
81
+ ax.set_xlabel("Difficulty tier", fontsize=12)
82
+ ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12)
83
+ ax.grid(True, linestyle=":", alpha=0.4)
84
+ ax.legend(loc="lower left", fontsize=11, framealpha=0.95)
85
+ fig.tight_layout()
86
+ fig.savefig("/workspace/artifacts/evaluation/comparison_curve.png")
87
+ print("wrote comparison_curve.png")
88
+ PY
89
+
90
+ echo "==[chaosops]== uploading artifacts to ${HUB_REPO_ID}"
91
+ HUB_REPO_ID="${HUB_REPO_ID}" python - <<'PY'
92
+ import os
93
+ from pathlib import Path
94
+ from huggingface_hub import HfApi
95
+ api = HfApi()
96
+ repo_id = os.environ["HUB_REPO_ID"]
97
+ for src, dst in [
98
+ ("/workspace/artifacts/evaluation/comparison_curve.png", "comparison_curve.png"),
99
+ ("/workspace/artifacts/evaluation/evaluation_summary.txt", "evaluation_summary.txt"),
100
+ ("/workspace/artifacts/evaluation/evaluation.json", "evaluation.json"),
101
+ ]:
102
+ if Path(src).exists():
103
+ api.upload_file(
104
+ path_or_fileobj=src,
105
+ path_in_repo=dst,
106
+ repo_id=repo_id,
107
+ repo_type="model",
108
+ commit_message=f"Add post-training {dst}",
109
+ )
110
+ print("uploaded", dst)
111
+ PY
112
+
113
+ echo "==[chaosops]== summary"
114
+ cat /workspace/artifacts/evaluation/evaluation_summary.txt
115
+ echo "==[chaosops]== done"