Spaces:
Sleeping
Sleeping
| """ | |
| CivicAI β Training Evidence Generator | |
| ====================================== | |
| Produces three publication-quality plots saved to assets/: | |
| reward_curve.png β Per-step reward over 50 turns (multi-agent baseline) | |
| comparison_chart.png β Random vs Rule-Agent across all 3 tasks | |
| component_scores.png β Economic / Health / Satisfaction / Crime breakdown | |
| Run: venv/Scripts/python.exe scripts/generate_training_plots.py | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| import json | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| import random | |
| import numpy as np | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import matplotlib.gridspec as gridspec | |
| from matplotlib.ticker import MaxNLocator | |
| from civicai.environment import CivicAIEnv | |
| from civicai.models import Action, SubsidyPolicy | |
| from civicai.reward import compute_reward, get_named_scores | |
| from agents.orchestrator import Orchestrator | |
| DARK_BG = "#0f172a" | |
| PANEL_BG = "#1e293b" | |
| GRID_COL = "#334155" | |
| TEXT_COL = "#e2e8f0" | |
| MUTED_COL = "#94a3b8" | |
| COLORS = { | |
| "random": "#ef4444", | |
| "agent": "#06b6d4", | |
| "economic": "#f59e0b", | |
| "health": "#10b981", | |
| "sat": "#a78bfa", | |
| "crime": "#f97316", | |
| } | |
| os.makedirs("assets", exist_ok=True) | |
| # --------------------------------------------------------------------------- | |
| # Episode runners | |
| # --------------------------------------------------------------------------- | |
| def run_random_episode(task_id: str = "stabilize_economy", seed: int = 42) -> dict: | |
| rng = random.Random(seed) | |
| env = CivicAIEnv() | |
| obs = env.reset(task_id=task_id, seed=seed) | |
| rewards, components_history = [], [] | |
| for _ in range(50): | |
| action = Action( | |
| tax_rate=rng.uniform(0.15, 0.50), | |
| healthcare_budget=rng.uniform(0.08, 0.35), | |
| education_budget=rng.uniform(0.05, 0.25), | |
| police_budget=rng.uniform(0.03, 0.18), | |
| subsidy_policy=rng.choice(list(SubsidyPolicy)), | |
| ) | |
| obs, reward, done, info = env.step(action) | |
| rewards.append(reward) | |
| state = env.state() | |
| reward_obj = compute_reward(state, action) | |
| components_history.append(get_named_scores(reward_obj)) | |
| if done: | |
| break | |
| return {"rewards": rewards, "components": components_history} | |
| def run_agent_episode(task_id: str = "stabilize_economy") -> dict: | |
| env = CivicAIEnv() | |
| orch = Orchestrator(env) | |
| obs = orch.reset(task_id) | |
| rewards, components_history = [], [] | |
| done = False | |
| while not done: | |
| obs, reward, done, info = orch.run_step() | |
| rewards.append(reward) | |
| state = env.state() | |
| action = Action() # last action proxy β components come from state | |
| reward_obj = compute_reward(state, action) | |
| components_history.append(get_named_scores(reward_obj)) | |
| return {"rewards": rewards, "components": components_history} | |
| # --------------------------------------------------------------------------- | |
| # Plot 1 β Reward Curve (single task, agent vs random) | |
| # --------------------------------------------------------------------------- | |
| def plot_reward_curve() -> None: | |
| print(" Generating reward_curve.png ...") | |
| random_ep = run_random_episode("stabilize_economy", seed=7) | |
| agent_ep = run_agent_episode("stabilize_economy") | |
| fig, ax = plt.subplots(figsize=(11, 5)) | |
| fig.patch.set_facecolor(DARK_BG) | |
| ax.set_facecolor(PANEL_BG) | |
| r_turns = range(len(random_ep["rewards"])) | |
| a_turns = range(len(agent_ep["rewards"])) | |
| r_smooth = np.convolve(random_ep["rewards"], np.ones(5)/5, mode="valid") | |
| a_smooth = np.convolve(agent_ep["rewards"], np.ones(5)/5, mode="valid") | |
| ax.plot(r_turns, random_ep["rewards"], color=COLORS["random"], alpha=0.25, linewidth=1) | |
| ax.plot(range(len(r_smooth)), r_smooth, color=COLORS["random"], linewidth=2, | |
| label=f"Random Agent (avg={np.mean(random_ep['rewards']):.3f})") | |
| ax.plot(a_turns, agent_ep["rewards"], color=COLORS["agent"], alpha=0.25, linewidth=1) | |
| ax.plot(range(len(a_smooth)), a_smooth, color=COLORS["agent"], linewidth=2, | |
| label=f"Rule Agent (avg={np.mean(agent_ep['rewards']):.3f})") | |
| ax.fill_between(range(len(r_smooth)), r_smooth, alpha=0.08, color=COLORS["random"]) | |
| ax.fill_between(range(len(a_smooth)), a_smooth, alpha=0.08, color=COLORS["agent"]) | |
| ax.set_ylim(0, 1.05) | |
| ax.set_xlabel("Turn (Quarter)", color=MUTED_COL, fontsize=11) | |
| ax.set_ylabel("Step Reward [0β1]", color=MUTED_COL, fontsize=11) | |
| ax.set_title("CivicAI: Reward Curve β Economic Stability Task", | |
| color=TEXT_COL, fontsize=14, fontweight="bold", pad=12) | |
| ax.tick_params(colors=MUTED_COL) | |
| ax.xaxis.set_major_locator(MaxNLocator(integer=True)) | |
| for spine in ax.spines.values(): | |
| spine.set_edgecolor(GRID_COL) | |
| ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--") | |
| ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10) | |
| plt.tight_layout() | |
| plt.savefig("assets/reward_curve.png", dpi=150, facecolor=DARK_BG) | |
| plt.close() | |
| print(" Saved: assets/reward_curve.png") | |
| # --------------------------------------------------------------------------- | |
| # Plot 2 β Comparison Chart (3 tasks, agent vs random) | |
| # --------------------------------------------------------------------------- | |
| def plot_comparison_chart() -> None: | |
| print(" Generating comparison_chart.png ...") | |
| tasks = ["stabilize_economy", "manage_pandemic", "control_crisis"] | |
| labels = ["Economic\nStability", "Pandemic\nManagement", "Social\nCrisis"] | |
| n_ep = 3 | |
| agent_means, agent_stds = [], [] | |
| random_means, random_stds = [], [] | |
| for task_id in tasks: | |
| a_rewards, r_rewards = [], [] | |
| for seed in range(n_ep): | |
| r_ep = run_random_episode(task_id, seed=seed) | |
| a_ep = run_agent_episode(task_id) | |
| r_rewards.append(float(np.mean(r_ep["rewards"]))) | |
| a_rewards.append(float(np.mean(a_ep["rewards"]))) | |
| agent_means.append(float(np.mean(a_rewards))) | |
| agent_stds.append(float(np.std(a_rewards))) | |
| random_means.append(float(np.mean(r_rewards))) | |
| random_stds.append(float(np.std(r_rewards))) | |
| x = np.arange(len(tasks)) | |
| w = 0.35 | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| fig.patch.set_facecolor(DARK_BG) | |
| ax.set_facecolor(PANEL_BG) | |
| bars_r = ax.bar(x - w/2, random_means, w, yerr=random_stds, | |
| label="Random Agent", color=COLORS["random"], | |
| alpha=0.85, capsize=5, error_kw={"ecolor": "#fca5a5", "linewidth": 1.5}) | |
| bars_a = ax.bar(x + w/2, agent_means, w, yerr=agent_stds, | |
| label="Rule-Based Agent", color=COLORS["agent"], | |
| alpha=0.85, capsize=5, error_kw={"ecolor": "#67e8f9", "linewidth": 1.5}) | |
| # Value labels on bars | |
| for bar in bars_r: | |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, | |
| f"{bar.get_height():.3f}", ha="center", color=COLORS["random"], | |
| fontsize=9, fontweight="bold") | |
| for bar in bars_a: | |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, | |
| f"{bar.get_height():.3f}", ha="center", color=COLORS["agent"], | |
| fontsize=9, fontweight="bold") | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(labels, color=TEXT_COL, fontsize=11) | |
| ax.set_ylim(0, 1.10) | |
| ax.set_ylabel("Avg Step Reward [0β1]", color=MUTED_COL, fontsize=11) | |
| ax.set_title("CivicAI: Before vs After β Agent vs Random Baseline", | |
| color=TEXT_COL, fontsize=14, fontweight="bold", pad=12) | |
| ax.tick_params(colors=MUTED_COL) | |
| for spine in ax.spines.values(): | |
| spine.set_edgecolor(GRID_COL) | |
| ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--") | |
| ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10) | |
| plt.tight_layout() | |
| plt.savefig("assets/comparison_chart.png", dpi=150, facecolor=DARK_BG) | |
| plt.close() | |
| print(" Saved: assets/comparison_chart.png") | |
| # Save JSON results | |
| results = { | |
| t: { | |
| "agent_mean": round(agent_means[i], 4), | |
| "agent_std": round(agent_stds[i], 4), | |
| "random_mean": round(random_means[i], 4), | |
| "random_std": round(random_stds[i], 4), | |
| "improvement": round(agent_means[i] - random_means[i], 4), | |
| } | |
| for i, t in enumerate(tasks) | |
| } | |
| with open("assets/evaluation_results.json", "w") as f: | |
| json.dump(results, f, indent=2) | |
| print(" Saved: assets/evaluation_results.json") | |
| # --------------------------------------------------------------------------- | |
| # Plot 3 β Named Component Scores (economic/health/satisfaction/crime) | |
| # --------------------------------------------------------------------------- | |
| def plot_component_scores() -> None: | |
| print(" Generating component_scores.png ...") | |
| random_ep = run_random_episode("control_crisis", seed=13) | |
| agent_ep = run_agent_episode("control_crisis") | |
| fig = plt.figure(figsize=(14, 9)) | |
| fig.patch.set_facecolor(DARK_BG) | |
| gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.45, wspace=0.35) | |
| component_info = [ | |
| ("economic_score", "Economic Score", COLORS["economic"]), | |
| ("health_score", "Health Score", COLORS["health"]), | |
| ("satisfaction_score", "Satisfaction Score", COLORS["sat"]), | |
| ("crime_score", "Crime Score", COLORS["crime"]), | |
| ] | |
| for idx, (key, label, color) in enumerate(component_info): | |
| row, col = divmod(idx, 2) | |
| ax = fig.add_subplot(gs[row, col]) | |
| ax.set_facecolor(PANEL_BG) | |
| r_vals = [c[key] for c in random_ep["components"]] | |
| a_vals = [c[key] for c in agent_ep["components"]] | |
| # Smooth | |
| r_s = np.convolve(r_vals, np.ones(5)/5, mode="valid") if len(r_vals) > 5 else r_vals | |
| a_s = np.convolve(a_vals, np.ones(5)/5, mode="valid") if len(a_vals) > 5 else a_vals | |
| ax.plot(r_vals, color=COLORS["random"], alpha=0.20, linewidth=0.8) | |
| ax.plot(range(len(r_s)), r_s, color=COLORS["random"], linewidth=1.8, | |
| label=f"Random (avg={np.mean(r_vals):.2f})") | |
| ax.plot(a_vals, color=color, alpha=0.20, linewidth=0.8) | |
| ax.plot(range(len(a_s)), a_s, color=color, linewidth=1.8, | |
| label=f"Agent (avg={np.mean(a_vals):.2f})") | |
| ax.fill_between(range(len(a_s)), a_s, alpha=0.10, color=color) | |
| ax.set_ylim(0, 1.05) | |
| ax.set_title(label, color=TEXT_COL, fontsize=12, fontweight="bold") | |
| ax.set_xlabel("Turn", color=MUTED_COL, fontsize=9) | |
| ax.set_ylabel("Score [0β1]", color=MUTED_COL, fontsize=9) | |
| ax.tick_params(colors=MUTED_COL, labelsize=8) | |
| for spine in ax.spines.values(): | |
| spine.set_edgecolor(GRID_COL) | |
| ax.grid(color=GRID_COL, linewidth=0.4, linestyle="--") | |
| ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=8) | |
| fig.suptitle( | |
| "CivicAI: Named Reward Components β Social Crisis Task", | |
| color=TEXT_COL, fontsize=15, fontweight="bold", y=0.98 | |
| ) | |
| plt.savefig("assets/component_scores.png", dpi=150, facecolor=DARK_BG) | |
| plt.close() | |
| print(" Saved: assets/component_scores.png") | |
| # --------------------------------------------------------------------------- | |
| # Main | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| print("\n[CivicAI] Generating Training Evidence Plots\n") | |
| plot_reward_curve() | |
| plot_comparison_chart() | |
| plot_component_scores() | |
| print("\n[CivicAI] All plots saved to assets/") | |
| print(" assets/reward_curve.png") | |
| print(" assets/comparison_chart.png") | |
| print(" assets/component_scores.png") | |
| print(" assets/evaluation_results.json") | |