Spaces:
Paused
Paused
File size: 4,592 Bytes
6ac8669 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | #!/usr/bin/env python3
"""
Fast evidence PNG for judges (no GPU, ~1β3 min).
Produces evidence_grpo_training.png with:
- Real per-step rewards from a short heuristic episode (env rollout).
- A second panel pointing to Colab for GRPO loss / full training curves.
This does NOT fabricate GRPO loss. It shows real environment signal + where to
find training curves (ImmunoOrg_Training_Colab.ipynb Step 4b).
"""
from __future__ import annotations
import sys
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
REPO = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(REPO))
from immunoorg.environment import ImmunoOrgEnvironment # noqa: E402
from immunoorg.models import ( # noqa: E402
ActionType,
DiagnosticAction,
ImmunoAction,
StrategicAction,
TacticalAction,
)
def _heuristic(obs, env):
"""Tiny heuristic: same spirit as demo (contain + progress)."""
phase = obs.current_phase.value
nodes = obs.visible_nodes
compromised = [n for n in nodes if n.compromised and not n.isolated]
if phase == "detection":
t = compromised[0].id if compromised else (nodes[0].id if nodes else "")
return ImmunoAction(
action_type=ActionType.TACTICAL,
tactical_action=TacticalAction.SCAN_LOGS,
target=t,
reasoning="evidence script",
)
if phase == "containment" and compromised:
return ImmunoAction(
action_type=ActionType.TACTICAL,
tactical_action=TacticalAction.ISOLATE_NODE,
target=compromised[0].id,
reasoning="evidence script",
)
if phase == "rca":
return ImmunoAction(
action_type=ActionType.DIAGNOSTIC,
diagnostic_action=DiagnosticAction.IDENTIFY_SILO,
reasoning="evidence script",
)
if phase == "refactor":
return ImmunoAction(
action_type=ActionType.STRATEGIC,
strategic_action=StrategicAction.REDUCE_BUREAUCRACY,
target="dept-management",
reasoning="evidence script",
)
return ImmunoAction(
action_type=ActionType.DIAGNOSTIC,
diagnostic_action=DiagnosticAction.MEASURE_ORG_LATENCY,
reasoning="evidence script",
)
def main() -> None:
env = ImmunoOrgEnvironment(difficulty=1, seed=42)
obs = env.reset()
steps_r: list[int] = []
rewards_r: list[float] = []
cum: list[float] = []
total = 0.0
max_steps = 35
for t in range(max_steps):
action = _heuristic(obs, env)
obs, r, done = env.step(action)
total += float(r)
steps_r.append(t + 1)
rewards_r.append(float(r))
cum.append(total)
if done:
break
DARK, CARD, TEXT, GRID = "#0d1117", "#161b22", "#c9d1d9", "#30363d"
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 7), dpi=140, height_ratios=[2.2, 1.0])
fig.patch.set_facecolor(DARK)
for ax in (ax1, ax2):
ax.set_facecolor(CARD)
ax.tick_params(colors=TEXT)
for s in ax.spines.values():
s.set_color(GRID)
ax1.plot(steps_r, cum, color="#3fb950", lw=2, marker="o", ms=3, label="cumulative reward")
ax1.set_xlabel("env step", color=TEXT)
ax1.set_ylabel("cumulative episode reward", color=TEXT)
ax1.set_title(
"Real env rollout β heuristic policy (difficulty 1)\n"
"GRPO in Colab learns policies that improve rewards in this same simulator",
color=TEXT,
fontsize=11,
)
ax1.grid(True, color=GRID, alpha=0.5, linestyle="--")
leg = ax1.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT)
ax2.axis("off")
msg = (
"GRPO training loss + logged rewards\n"
"ββββββββββββββββββββββββββββββββββββ\n"
"Open: ImmunoOrg_Training_Colab.ipynb\n"
"β Runtime β Run all (GPU)\n"
"β Step 4b saves evidence_grpo_training.png\n"
"\n"
"This fileβs top panel is a real environment signal;\n"
"the notebook adds the optimizer loss curves judges ask for."
)
ax2.text(
0.04,
0.96,
msg,
transform=ax2.transAxes,
va="top",
ha="left",
fontsize=10,
color=TEXT,
family="monospace",
)
fig.tight_layout()
out = REPO / "evidence_grpo_training.png"
fig.savefig(out, bbox_inches="tight", facecolor=DARK)
print(f"Wrote {out}")
if __name__ == "__main__":
main()
|