Spaces:

helloAK96
/

chaosops

Running

App Files Files Community

chaosops / scripts /jobs_evaluate.sh

helloAK96

Add HF Jobs entry-point for post-training evaluation

b36bddc 14 days ago

raw

history blame contribute delete

3.97 kB

	#!/usr/bin/env bash
	# ChaosOps AI — post-training evaluation Job entry point.
	#
	# Pulls the LoRA from the Hub, runs `chaosops.train.evaluate` on EASY/
	# MEDIUM/HARD across all 9 failure types, regenerates a labelled
	# comparison_curve.png, and uploads everything back to the Space repo.

	set -euo pipefail

	EPISODES_PER_TYPE="${EPISODES_PER_TYPE:-5}"
	HUB_REPO_ID="${HUB_REPO_ID:-helloAK96/chaosops-grpo-lora}"

	echo "==[chaosops]== installing deps"
	pip install --quiet --upgrade pip
	pip install --quiet --no-deps "torch==2.4.1+cu124" \
	--index-url https://download.pytorch.org/whl/cu124 \|\| true
	pip install --quiet \
	"transformers>=4.44.0,<4.50.0" \
	"peft>=0.12.0,<0.14.0" \
	"accelerate>=0.33.0,<0.36.0" \
	"huggingface_hub>=0.24.0" \
	"pydantic>=2.0.0" \
	"matplotlib>=3.7.0" \
	"datasets>=2.20.0,<3.0.0" \
	"bitsandbytes==0.43.3"

	echo "==[chaosops]== preparing source tree"
	ln -sfn /data /tmp/chaosops
	export PYTHONPATH="/tmp:${PYTHONPATH:-}"

	mkdir -p /workspace
	cd /workspace

	echo "==[chaosops]== downloading LoRA from ${HUB_REPO_ID}"
	hf download "${HUB_REPO_ID}" --repo-type model \
	--local-dir /workspace/lora_adapter >/dev/null

	echo "==[chaosops]== running evaluation sweep ($EPISODES_PER_TYPE episodes/type × 9 types × 3 tiers)"
	python -m chaosops.train.evaluate \
	--policies random heuristic oracle trained \
	--adapter-path /workspace/lora_adapter \
	--episodes-per-type "${EPISODES_PER_TYPE}" \
	--out-dir /workspace/artifacts/evaluation

	echo "==[chaosops]== rendering labelled comparison_curve.png"
	python - <<'PY'
	import json
	from pathlib import Path
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	eval_path = Path("/workspace/artifacts/evaluation/evaluation.json")
	data = json.loads(eval_path.read_text())
	aggregates = data["aggregates"]
	tiers = ["easy", "medium", "hard"]
	policies = ["random", "heuristic", "oracle", "trained"]
	color = {"random": "#c0392b", "heuristic": "#2980b9",
	"oracle": "#27ae60", "trained": "#8e44ad"}
	fig, ax = plt.subplots(figsize=(10, 5.5), dpi=160)
	for policy in policies:
	xs, ys = [], []
	for tier in tiers:
	match = next(
	(a for a in aggregates if a["policy"] == policy and a["tier"] == tier),
	None,
	)
	if match is None:
	continue
	xs.append(tier)
	ys.append(match["mean_reward"])
	if xs:
	ax.plot(xs, ys, marker="o", label=policy,
	color=color[policy], linewidth=2.4, markersize=8)
	ax.axhline(0, color="#888", linewidth=0.6)
	ax.set_title(
	"ChaosOps AI — Trained Qwen 1.5B vs. baselines\n"
	"(5 seeds × 9 failure types × 3 tiers, mean cumulative reward)",
	fontsize=13,
	)
	ax.set_xlabel("Difficulty tier", fontsize=12)
	ax.set_ylabel("Mean cumulative episode reward (per-episode points)", fontsize=12)
	ax.grid(True, linestyle=":", alpha=0.4)
	ax.legend(loc="lower left", fontsize=11, framealpha=0.95)
	fig.tight_layout()
	fig.savefig("/workspace/artifacts/evaluation/comparison_curve.png")
	print("wrote comparison_curve.png")
	PY

	echo "==[chaosops]== uploading artifacts to ${HUB_REPO_ID}"
	HUB_REPO_ID="${HUB_REPO_ID}" python - <<'PY'
	import os
	from pathlib import Path
	from huggingface_hub import HfApi
	api = HfApi()
	repo_id = os.environ["HUB_REPO_ID"]
	for src, dst in [
	("/workspace/artifacts/evaluation/comparison_curve.png", "comparison_curve.png"),
	("/workspace/artifacts/evaluation/evaluation_summary.txt", "evaluation_summary.txt"),
	("/workspace/artifacts/evaluation/evaluation.json", "evaluation.json"),
	]:
	if Path(src).exists():
	api.upload_file(
	path_or_fileobj=src,
	path_in_repo=dst,
	repo_id=repo_id,
	repo_type="model",
	commit_message=f"Add post-training {dst}",
	)
	print("uploaded", dst)
	PY

	echo "==[chaosops]== summary"
	cat /workspace/artifacts/evaluation/evaluation_summary.txt
	echo "==[chaosops]== done"