""" CivicAI — Training Evidence Generator ====================================== Produces three publication-quality plots saved to assets/: reward_curve.png — Per-step reward over 50 turns (multi-agent baseline) comparison_chart.png — Random vs Rule-Agent across all 3 tasks component_scores.png — Economic / Health / Satisfaction / Crime breakdown Run: venv/Scripts/python.exe scripts/generate_training_plots.py """ from __future__ import annotations import os import sys import json sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import random import numpy as np import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from matplotlib.ticker import MaxNLocator from civicai.environment import CivicAIEnv from civicai.models import Action, SubsidyPolicy from civicai.reward import compute_reward, get_named_scores from agents.orchestrator import Orchestrator DARK_BG = "#0f172a" PANEL_BG = "#1e293b" GRID_COL = "#334155" TEXT_COL = "#e2e8f0" MUTED_COL = "#94a3b8" COLORS = { "random": "#ef4444", "agent": "#06b6d4", "economic": "#f59e0b", "health": "#10b981", "sat": "#a78bfa", "crime": "#f97316", } os.makedirs("assets", exist_ok=True) # --------------------------------------------------------------------------- # Episode runners # --------------------------------------------------------------------------- def run_random_episode(task_id: str = "stabilize_economy", seed: int = 42) -> dict: rng = random.Random(seed) env = CivicAIEnv() obs = env.reset(task_id=task_id, seed=seed) rewards, components_history = [], [] for _ in range(50): action = Action( tax_rate=rng.uniform(0.15, 0.50), healthcare_budget=rng.uniform(0.08, 0.35), education_budget=rng.uniform(0.05, 0.25), police_budget=rng.uniform(0.03, 0.18), subsidy_policy=rng.choice(list(SubsidyPolicy)), ) obs, reward, done, info = env.step(action) rewards.append(reward) state = env.state() reward_obj = compute_reward(state, action) components_history.append(get_named_scores(reward_obj)) if done: break return {"rewards": rewards, "components": components_history} def run_agent_episode(task_id: str = "stabilize_economy") -> dict: env = CivicAIEnv() orch = Orchestrator(env) obs = orch.reset(task_id) rewards, components_history = [], [] done = False while not done: obs, reward, done, info = orch.run_step() rewards.append(reward) state = env.state() action = Action() # last action proxy — components come from state reward_obj = compute_reward(state, action) components_history.append(get_named_scores(reward_obj)) return {"rewards": rewards, "components": components_history} # --------------------------------------------------------------------------- # Plot 1 — Reward Curve (single task, agent vs random) # --------------------------------------------------------------------------- def plot_reward_curve() -> None: print(" Generating reward_curve.png ...") random_ep = run_random_episode("stabilize_economy", seed=7) agent_ep = run_agent_episode("stabilize_economy") fig, ax = plt.subplots(figsize=(11, 5)) fig.patch.set_facecolor(DARK_BG) ax.set_facecolor(PANEL_BG) r_turns = range(len(random_ep["rewards"])) a_turns = range(len(agent_ep["rewards"])) r_smooth = np.convolve(random_ep["rewards"], np.ones(5)/5, mode="valid") a_smooth = np.convolve(agent_ep["rewards"], np.ones(5)/5, mode="valid") ax.plot(r_turns, random_ep["rewards"], color=COLORS["random"], alpha=0.25, linewidth=1) ax.plot(range(len(r_smooth)), r_smooth, color=COLORS["random"], linewidth=2, label=f"Random Agent (avg={np.mean(random_ep['rewards']):.3f})") ax.plot(a_turns, agent_ep["rewards"], color=COLORS["agent"], alpha=0.25, linewidth=1) ax.plot(range(len(a_smooth)), a_smooth, color=COLORS["agent"], linewidth=2, label=f"Rule Agent (avg={np.mean(agent_ep['rewards']):.3f})") ax.fill_between(range(len(r_smooth)), r_smooth, alpha=0.08, color=COLORS["random"]) ax.fill_between(range(len(a_smooth)), a_smooth, alpha=0.08, color=COLORS["agent"]) ax.set_ylim(0, 1.05) ax.set_xlabel("Turn (Quarter)", color=MUTED_COL, fontsize=11) ax.set_ylabel("Step Reward [0–1]", color=MUTED_COL, fontsize=11) ax.set_title("CivicAI: Reward Curve — Economic Stability Task", color=TEXT_COL, fontsize=14, fontweight="bold", pad=12) ax.tick_params(colors=MUTED_COL) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) for spine in ax.spines.values(): spine.set_edgecolor(GRID_COL) ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--") ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10) plt.tight_layout() plt.savefig("assets/reward_curve.png", dpi=150, facecolor=DARK_BG) plt.close() print(" Saved: assets/reward_curve.png") # --------------------------------------------------------------------------- # Plot 2 — Comparison Chart (3 tasks, agent vs random) # --------------------------------------------------------------------------- def plot_comparison_chart() -> None: print(" Generating comparison_chart.png ...") tasks = ["stabilize_economy", "manage_pandemic", "control_crisis"] labels = ["Economic\nStability", "Pandemic\nManagement", "Social\nCrisis"] n_ep = 3 agent_means, agent_stds = [], [] random_means, random_stds = [], [] for task_id in tasks: a_rewards, r_rewards = [], [] for seed in range(n_ep): r_ep = run_random_episode(task_id, seed=seed) a_ep = run_agent_episode(task_id) r_rewards.append(float(np.mean(r_ep["rewards"]))) a_rewards.append(float(np.mean(a_ep["rewards"]))) agent_means.append(float(np.mean(a_rewards))) agent_stds.append(float(np.std(a_rewards))) random_means.append(float(np.mean(r_rewards))) random_stds.append(float(np.std(r_rewards))) x = np.arange(len(tasks)) w = 0.35 fig, ax = plt.subplots(figsize=(10, 6)) fig.patch.set_facecolor(DARK_BG) ax.set_facecolor(PANEL_BG) bars_r = ax.bar(x - w/2, random_means, w, yerr=random_stds, label="Random Agent", color=COLORS["random"], alpha=0.85, capsize=5, error_kw={"ecolor": "#fca5a5", "linewidth": 1.5}) bars_a = ax.bar(x + w/2, agent_means, w, yerr=agent_stds, label="Rule-Based Agent", color=COLORS["agent"], alpha=0.85, capsize=5, error_kw={"ecolor": "#67e8f9", "linewidth": 1.5}) # Value labels on bars for bar in bars_r: ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f"{bar.get_height():.3f}", ha="center", color=COLORS["random"], fontsize=9, fontweight="bold") for bar in bars_a: ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f"{bar.get_height():.3f}", ha="center", color=COLORS["agent"], fontsize=9, fontweight="bold") ax.set_xticks(x) ax.set_xticklabels(labels, color=TEXT_COL, fontsize=11) ax.set_ylim(0, 1.10) ax.set_ylabel("Avg Step Reward [0–1]", color=MUTED_COL, fontsize=11) ax.set_title("CivicAI: Before vs After — Agent vs Random Baseline", color=TEXT_COL, fontsize=14, fontweight="bold", pad=12) ax.tick_params(colors=MUTED_COL) for spine in ax.spines.values(): spine.set_edgecolor(GRID_COL) ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--") ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10) plt.tight_layout() plt.savefig("assets/comparison_chart.png", dpi=150, facecolor=DARK_BG) plt.close() print(" Saved: assets/comparison_chart.png") # Save JSON results results = { t: { "agent_mean": round(agent_means[i], 4), "agent_std": round(agent_stds[i], 4), "random_mean": round(random_means[i], 4), "random_std": round(random_stds[i], 4), "improvement": round(agent_means[i] - random_means[i], 4), } for i, t in enumerate(tasks) } with open("assets/evaluation_results.json", "w") as f: json.dump(results, f, indent=2) print(" Saved: assets/evaluation_results.json") # --------------------------------------------------------------------------- # Plot 3 — Named Component Scores (economic/health/satisfaction/crime) # --------------------------------------------------------------------------- def plot_component_scores() -> None: print(" Generating component_scores.png ...") random_ep = run_random_episode("control_crisis", seed=13) agent_ep = run_agent_episode("control_crisis") fig = plt.figure(figsize=(14, 9)) fig.patch.set_facecolor(DARK_BG) gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.45, wspace=0.35) component_info = [ ("economic_score", "Economic Score", COLORS["economic"]), ("health_score", "Health Score", COLORS["health"]), ("satisfaction_score", "Satisfaction Score", COLORS["sat"]), ("crime_score", "Crime Score", COLORS["crime"]), ] for idx, (key, label, color) in enumerate(component_info): row, col = divmod(idx, 2) ax = fig.add_subplot(gs[row, col]) ax.set_facecolor(PANEL_BG) r_vals = [c[key] for c in random_ep["components"]] a_vals = [c[key] for c in agent_ep["components"]] # Smooth r_s = np.convolve(r_vals, np.ones(5)/5, mode="valid") if len(r_vals) > 5 else r_vals a_s = np.convolve(a_vals, np.ones(5)/5, mode="valid") if len(a_vals) > 5 else a_vals ax.plot(r_vals, color=COLORS["random"], alpha=0.20, linewidth=0.8) ax.plot(range(len(r_s)), r_s, color=COLORS["random"], linewidth=1.8, label=f"Random (avg={np.mean(r_vals):.2f})") ax.plot(a_vals, color=color, alpha=0.20, linewidth=0.8) ax.plot(range(len(a_s)), a_s, color=color, linewidth=1.8, label=f"Agent (avg={np.mean(a_vals):.2f})") ax.fill_between(range(len(a_s)), a_s, alpha=0.10, color=color) ax.set_ylim(0, 1.05) ax.set_title(label, color=TEXT_COL, fontsize=12, fontweight="bold") ax.set_xlabel("Turn", color=MUTED_COL, fontsize=9) ax.set_ylabel("Score [0–1]", color=MUTED_COL, fontsize=9) ax.tick_params(colors=MUTED_COL, labelsize=8) for spine in ax.spines.values(): spine.set_edgecolor(GRID_COL) ax.grid(color=GRID_COL, linewidth=0.4, linestyle="--") ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=8) fig.suptitle( "CivicAI: Named Reward Components — Social Crisis Task", color=TEXT_COL, fontsize=15, fontweight="bold", y=0.98 ) plt.savefig("assets/component_scores.png", dpi=150, facecolor=DARK_BG) plt.close() print(" Saved: assets/component_scores.png") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- if __name__ == "__main__": print("\n[CivicAI] Generating Training Evidence Plots\n") plot_reward_curve() plot_comparison_chart() plot_component_scores() print("\n[CivicAI] All plots saved to assets/") print(" assets/reward_curve.png") print(" assets/comparison_chart.png") print(" assets/component_scores.png") print(" assets/evaluation_results.json")