Spaces:

hirann
/

immunoorg-v3

Paused

App Files Files Community

immunoorg-v3 / scripts /make_hackathon_training_figure.py

hirann

Upload scripts/make_hackathon_training_figure.py with huggingface_hub

6ac8669 verified 12 days ago

raw

history blame contribute delete

4.59 kB

	#!/usr/bin/env python3
	"""
	Fast evidence PNG for judges (no GPU, ~1–3 min).

	Produces evidence_grpo_training.png with:
	- Real per-step rewards from a short heuristic episode (env rollout).
	- A second panel pointing to Colab for GRPO loss / full training curves.

	This does NOT fabricate GRPO loss. It shows real environment signal + where to
	find training curves (ImmunoOrg_Training_Colab.ipynb Step 4b).
	"""

	from __future__ import annotations

	import sys
	from pathlib import Path

	import matplotlib

	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	REPO = Path(__file__).resolve().parent.parent
	sys.path.insert(0, str(REPO))

	from immunoorg.environment import ImmunoOrgEnvironment # noqa: E402
	from immunoorg.models import ( # noqa: E402
	ActionType,
	DiagnosticAction,
	ImmunoAction,
	StrategicAction,
	TacticalAction,
	)


	def _heuristic(obs, env):
	"""Tiny heuristic: same spirit as demo (contain + progress)."""
	phase = obs.current_phase.value
	nodes = obs.visible_nodes
	compromised = [n for n in nodes if n.compromised and not n.isolated]
	if phase == "detection":
	t = compromised[0].id if compromised else (nodes[0].id if nodes else "")
	return ImmunoAction(
	action_type=ActionType.TACTICAL,
	tactical_action=TacticalAction.SCAN_LOGS,
	target=t,
	reasoning="evidence script",
	)
	if phase == "containment" and compromised:
	return ImmunoAction(
	action_type=ActionType.TACTICAL,
	tactical_action=TacticalAction.ISOLATE_NODE,
	target=compromised[0].id,
	reasoning="evidence script",
	)
	if phase == "rca":
	return ImmunoAction(
	action_type=ActionType.DIAGNOSTIC,
	diagnostic_action=DiagnosticAction.IDENTIFY_SILO,
	reasoning="evidence script",
	)
	if phase == "refactor":
	return ImmunoAction(
	action_type=ActionType.STRATEGIC,
	strategic_action=StrategicAction.REDUCE_BUREAUCRACY,
	target="dept-management",
	reasoning="evidence script",
	)
	return ImmunoAction(
	action_type=ActionType.DIAGNOSTIC,
	diagnostic_action=DiagnosticAction.MEASURE_ORG_LATENCY,
	reasoning="evidence script",
	)


	def main() -> None:
	env = ImmunoOrgEnvironment(difficulty=1, seed=42)
	obs = env.reset()
	steps_r: list[int] = []
	rewards_r: list[float] = []
	cum: list[float] = []
	total = 0.0
	max_steps = 35
	for t in range(max_steps):
	action = _heuristic(obs, env)
	obs, r, done = env.step(action)
	total += float(r)
	steps_r.append(t + 1)
	rewards_r.append(float(r))
	cum.append(total)
	if done:
	break

	DARK, CARD, TEXT, GRID = "#0d1117", "#161b22", "#c9d1d9", "#30363d"
	fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 7), dpi=140, height_ratios=[2.2, 1.0])
	fig.patch.set_facecolor(DARK)
	for ax in (ax1, ax2):
	ax.set_facecolor(CARD)
	ax.tick_params(colors=TEXT)
	for s in ax.spines.values():
	s.set_color(GRID)

	ax1.plot(steps_r, cum, color="#3fb950", lw=2, marker="o", ms=3, label="cumulative reward")
	ax1.set_xlabel("env step", color=TEXT)
	ax1.set_ylabel("cumulative episode reward", color=TEXT)
	ax1.set_title(
	"Real env rollout — heuristic policy (difficulty 1)\n"
	"GRPO in Colab learns policies that improve rewards in this same simulator",
	color=TEXT,
	fontsize=11,
	)
	ax1.grid(True, color=GRID, alpha=0.5, linestyle="--")
	leg = ax1.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT)

	ax2.axis("off")
	msg = (
	"GRPO training loss + logged rewards\n"
	"────────────────────────────────────\n"
	"Open: ImmunoOrg_Training_Colab.ipynb\n"
	"→ Runtime → Run all (GPU)\n"
	"→ Step 4b saves evidence_grpo_training.png\n"
	"\n"
	"This file’s top panel is a real environment signal;\n"
	"the notebook adds the optimizer loss curves judges ask for."
	)
	ax2.text(
	0.04,
	0.96,
	msg,
	transform=ax2.transAxes,
	va="top",
	ha="left",
	fontsize=10,
	color=TEXT,
	family="monospace",
	)

	fig.tight_layout()
	out = REPO / "evidence_grpo_training.png"
	fig.savefig(out, bbox_inches="tight", facecolor=DARK)
	print(f"Wrote {out}")


	if __name__ == "__main__":
	main()