Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Fast evidence PNG for judges (no GPU, ~1β3 min). | |
| Produces evidence_grpo_training.png with: | |
| - Real per-step rewards from a short heuristic episode (env rollout). | |
| - A second panel pointing to Colab for GRPO loss / full training curves. | |
| This does NOT fabricate GRPO loss. It shows real environment signal + where to | |
| find training curves (ImmunoOrg_Training_Colab.ipynb Step 4b). | |
| """ | |
| from __future__ import annotations | |
| import sys | |
| from pathlib import Path | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| REPO = Path(__file__).resolve().parent.parent | |
| sys.path.insert(0, str(REPO)) | |
| from immunoorg.environment import ImmunoOrgEnvironment # noqa: E402 | |
| from immunoorg.models import ( # noqa: E402 | |
| ActionType, | |
| DiagnosticAction, | |
| ImmunoAction, | |
| StrategicAction, | |
| TacticalAction, | |
| ) | |
| def _heuristic(obs, env): | |
| """Tiny heuristic: same spirit as demo (contain + progress).""" | |
| phase = obs.current_phase.value | |
| nodes = obs.visible_nodes | |
| compromised = [n for n in nodes if n.compromised and not n.isolated] | |
| if phase == "detection": | |
| t = compromised[0].id if compromised else (nodes[0].id if nodes else "") | |
| return ImmunoAction( | |
| action_type=ActionType.TACTICAL, | |
| tactical_action=TacticalAction.SCAN_LOGS, | |
| target=t, | |
| reasoning="evidence script", | |
| ) | |
| if phase == "containment" and compromised: | |
| return ImmunoAction( | |
| action_type=ActionType.TACTICAL, | |
| tactical_action=TacticalAction.ISOLATE_NODE, | |
| target=compromised[0].id, | |
| reasoning="evidence script", | |
| ) | |
| if phase == "rca": | |
| return ImmunoAction( | |
| action_type=ActionType.DIAGNOSTIC, | |
| diagnostic_action=DiagnosticAction.IDENTIFY_SILO, | |
| reasoning="evidence script", | |
| ) | |
| if phase == "refactor": | |
| return ImmunoAction( | |
| action_type=ActionType.STRATEGIC, | |
| strategic_action=StrategicAction.REDUCE_BUREAUCRACY, | |
| target="dept-management", | |
| reasoning="evidence script", | |
| ) | |
| return ImmunoAction( | |
| action_type=ActionType.DIAGNOSTIC, | |
| diagnostic_action=DiagnosticAction.MEASURE_ORG_LATENCY, | |
| reasoning="evidence script", | |
| ) | |
| def main() -> None: | |
| env = ImmunoOrgEnvironment(difficulty=1, seed=42) | |
| obs = env.reset() | |
| steps_r: list[int] = [] | |
| rewards_r: list[float] = [] | |
| cum: list[float] = [] | |
| total = 0.0 | |
| max_steps = 35 | |
| for t in range(max_steps): | |
| action = _heuristic(obs, env) | |
| obs, r, done = env.step(action) | |
| total += float(r) | |
| steps_r.append(t + 1) | |
| rewards_r.append(float(r)) | |
| cum.append(total) | |
| if done: | |
| break | |
| DARK, CARD, TEXT, GRID = "#0d1117", "#161b22", "#c9d1d9", "#30363d" | |
| fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 7), dpi=140, height_ratios=[2.2, 1.0]) | |
| fig.patch.set_facecolor(DARK) | |
| for ax in (ax1, ax2): | |
| ax.set_facecolor(CARD) | |
| ax.tick_params(colors=TEXT) | |
| for s in ax.spines.values(): | |
| s.set_color(GRID) | |
| ax1.plot(steps_r, cum, color="#3fb950", lw=2, marker="o", ms=3, label="cumulative reward") | |
| ax1.set_xlabel("env step", color=TEXT) | |
| ax1.set_ylabel("cumulative episode reward", color=TEXT) | |
| ax1.set_title( | |
| "Real env rollout β heuristic policy (difficulty 1)\n" | |
| "GRPO in Colab learns policies that improve rewards in this same simulator", | |
| color=TEXT, | |
| fontsize=11, | |
| ) | |
| ax1.grid(True, color=GRID, alpha=0.5, linestyle="--") | |
| leg = ax1.legend(facecolor=CARD, edgecolor=GRID, labelcolor=TEXT) | |
| ax2.axis("off") | |
| msg = ( | |
| "GRPO training loss + logged rewards\n" | |
| "ββββββββββββββββββββββββββββββββββββ\n" | |
| "Open: ImmunoOrg_Training_Colab.ipynb\n" | |
| "β Runtime β Run all (GPU)\n" | |
| "β Step 4b saves evidence_grpo_training.png\n" | |
| "\n" | |
| "This fileβs top panel is a real environment signal;\n" | |
| "the notebook adds the optimizer loss curves judges ask for." | |
| ) | |
| ax2.text( | |
| 0.04, | |
| 0.96, | |
| msg, | |
| transform=ax2.transAxes, | |
| va="top", | |
| ha="left", | |
| fontsize=10, | |
| color=TEXT, | |
| family="monospace", | |
| ) | |
| fig.tight_layout() | |
| out = REPO / "evidence_grpo_training.png" | |
| fig.savefig(out, bbox_inches="tight", facecolor=DARK) | |
| print(f"Wrote {out}") | |
| if __name__ == "__main__": | |
| main() | |