CivicAI / scripts /generate_training_plots.py
mahammadaftab's picture
Final updated
6298125
"""
CivicAI β€” Training Evidence Generator
======================================
Produces three publication-quality plots saved to assets/:
reward_curve.png β€” Per-step reward over 50 turns (multi-agent baseline)
comparison_chart.png β€” Random vs Rule-Agent across all 3 tasks
component_scores.png β€” Economic / Health / Satisfaction / Crime breakdown
Run: venv/Scripts/python.exe scripts/generate_training_plots.py
"""
from __future__ import annotations
import os
import sys
import json
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import random
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.ticker import MaxNLocator
from civicai.environment import CivicAIEnv
from civicai.models import Action, SubsidyPolicy
from civicai.reward import compute_reward, get_named_scores
from agents.orchestrator import Orchestrator
DARK_BG = "#0f172a"
PANEL_BG = "#1e293b"
GRID_COL = "#334155"
TEXT_COL = "#e2e8f0"
MUTED_COL = "#94a3b8"
COLORS = {
"random": "#ef4444",
"agent": "#06b6d4",
"economic": "#f59e0b",
"health": "#10b981",
"sat": "#a78bfa",
"crime": "#f97316",
}
os.makedirs("assets", exist_ok=True)
# ---------------------------------------------------------------------------
# Episode runners
# ---------------------------------------------------------------------------
def run_random_episode(task_id: str = "stabilize_economy", seed: int = 42) -> dict:
rng = random.Random(seed)
env = CivicAIEnv()
obs = env.reset(task_id=task_id, seed=seed)
rewards, components_history = [], []
for _ in range(50):
action = Action(
tax_rate=rng.uniform(0.15, 0.50),
healthcare_budget=rng.uniform(0.08, 0.35),
education_budget=rng.uniform(0.05, 0.25),
police_budget=rng.uniform(0.03, 0.18),
subsidy_policy=rng.choice(list(SubsidyPolicy)),
)
obs, reward, done, info = env.step(action)
rewards.append(reward)
state = env.state()
reward_obj = compute_reward(state, action)
components_history.append(get_named_scores(reward_obj))
if done:
break
return {"rewards": rewards, "components": components_history}
def run_agent_episode(task_id: str = "stabilize_economy") -> dict:
env = CivicAIEnv()
orch = Orchestrator(env)
obs = orch.reset(task_id)
rewards, components_history = [], []
done = False
while not done:
obs, reward, done, info = orch.run_step()
rewards.append(reward)
state = env.state()
action = Action() # last action proxy β€” components come from state
reward_obj = compute_reward(state, action)
components_history.append(get_named_scores(reward_obj))
return {"rewards": rewards, "components": components_history}
# ---------------------------------------------------------------------------
# Plot 1 β€” Reward Curve (single task, agent vs random)
# ---------------------------------------------------------------------------
def plot_reward_curve() -> None:
print(" Generating reward_curve.png ...")
random_ep = run_random_episode("stabilize_economy", seed=7)
agent_ep = run_agent_episode("stabilize_economy")
fig, ax = plt.subplots(figsize=(11, 5))
fig.patch.set_facecolor(DARK_BG)
ax.set_facecolor(PANEL_BG)
r_turns = range(len(random_ep["rewards"]))
a_turns = range(len(agent_ep["rewards"]))
r_smooth = np.convolve(random_ep["rewards"], np.ones(5)/5, mode="valid")
a_smooth = np.convolve(agent_ep["rewards"], np.ones(5)/5, mode="valid")
ax.plot(r_turns, random_ep["rewards"], color=COLORS["random"], alpha=0.25, linewidth=1)
ax.plot(range(len(r_smooth)), r_smooth, color=COLORS["random"], linewidth=2,
label=f"Random Agent (avg={np.mean(random_ep['rewards']):.3f})")
ax.plot(a_turns, agent_ep["rewards"], color=COLORS["agent"], alpha=0.25, linewidth=1)
ax.plot(range(len(a_smooth)), a_smooth, color=COLORS["agent"], linewidth=2,
label=f"Rule Agent (avg={np.mean(agent_ep['rewards']):.3f})")
ax.fill_between(range(len(r_smooth)), r_smooth, alpha=0.08, color=COLORS["random"])
ax.fill_between(range(len(a_smooth)), a_smooth, alpha=0.08, color=COLORS["agent"])
ax.set_ylim(0, 1.05)
ax.set_xlabel("Turn (Quarter)", color=MUTED_COL, fontsize=11)
ax.set_ylabel("Step Reward [0–1]", color=MUTED_COL, fontsize=11)
ax.set_title("CivicAI: Reward Curve β€” Economic Stability Task",
color=TEXT_COL, fontsize=14, fontweight="bold", pad=12)
ax.tick_params(colors=MUTED_COL)
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
for spine in ax.spines.values():
spine.set_edgecolor(GRID_COL)
ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--")
ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10)
plt.tight_layout()
plt.savefig("assets/reward_curve.png", dpi=150, facecolor=DARK_BG)
plt.close()
print(" Saved: assets/reward_curve.png")
# ---------------------------------------------------------------------------
# Plot 2 β€” Comparison Chart (3 tasks, agent vs random)
# ---------------------------------------------------------------------------
def plot_comparison_chart() -> None:
print(" Generating comparison_chart.png ...")
tasks = ["stabilize_economy", "manage_pandemic", "control_crisis"]
labels = ["Economic\nStability", "Pandemic\nManagement", "Social\nCrisis"]
n_ep = 3
agent_means, agent_stds = [], []
random_means, random_stds = [], []
for task_id in tasks:
a_rewards, r_rewards = [], []
for seed in range(n_ep):
r_ep = run_random_episode(task_id, seed=seed)
a_ep = run_agent_episode(task_id)
r_rewards.append(float(np.mean(r_ep["rewards"])))
a_rewards.append(float(np.mean(a_ep["rewards"])))
agent_means.append(float(np.mean(a_rewards)))
agent_stds.append(float(np.std(a_rewards)))
random_means.append(float(np.mean(r_rewards)))
random_stds.append(float(np.std(r_rewards)))
x = np.arange(len(tasks))
w = 0.35
fig, ax = plt.subplots(figsize=(10, 6))
fig.patch.set_facecolor(DARK_BG)
ax.set_facecolor(PANEL_BG)
bars_r = ax.bar(x - w/2, random_means, w, yerr=random_stds,
label="Random Agent", color=COLORS["random"],
alpha=0.85, capsize=5, error_kw={"ecolor": "#fca5a5", "linewidth": 1.5})
bars_a = ax.bar(x + w/2, agent_means, w, yerr=agent_stds,
label="Rule-Based Agent", color=COLORS["agent"],
alpha=0.85, capsize=5, error_kw={"ecolor": "#67e8f9", "linewidth": 1.5})
# Value labels on bars
for bar in bars_r:
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
f"{bar.get_height():.3f}", ha="center", color=COLORS["random"],
fontsize=9, fontweight="bold")
for bar in bars_a:
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
f"{bar.get_height():.3f}", ha="center", color=COLORS["agent"],
fontsize=9, fontweight="bold")
ax.set_xticks(x)
ax.set_xticklabels(labels, color=TEXT_COL, fontsize=11)
ax.set_ylim(0, 1.10)
ax.set_ylabel("Avg Step Reward [0–1]", color=MUTED_COL, fontsize=11)
ax.set_title("CivicAI: Before vs After β€” Agent vs Random Baseline",
color=TEXT_COL, fontsize=14, fontweight="bold", pad=12)
ax.tick_params(colors=MUTED_COL)
for spine in ax.spines.values():
spine.set_edgecolor(GRID_COL)
ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--")
ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10)
plt.tight_layout()
plt.savefig("assets/comparison_chart.png", dpi=150, facecolor=DARK_BG)
plt.close()
print(" Saved: assets/comparison_chart.png")
# Save JSON results
results = {
t: {
"agent_mean": round(agent_means[i], 4),
"agent_std": round(agent_stds[i], 4),
"random_mean": round(random_means[i], 4),
"random_std": round(random_stds[i], 4),
"improvement": round(agent_means[i] - random_means[i], 4),
}
for i, t in enumerate(tasks)
}
with open("assets/evaluation_results.json", "w") as f:
json.dump(results, f, indent=2)
print(" Saved: assets/evaluation_results.json")
# ---------------------------------------------------------------------------
# Plot 3 β€” Named Component Scores (economic/health/satisfaction/crime)
# ---------------------------------------------------------------------------
def plot_component_scores() -> None:
print(" Generating component_scores.png ...")
random_ep = run_random_episode("control_crisis", seed=13)
agent_ep = run_agent_episode("control_crisis")
fig = plt.figure(figsize=(14, 9))
fig.patch.set_facecolor(DARK_BG)
gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.45, wspace=0.35)
component_info = [
("economic_score", "Economic Score", COLORS["economic"]),
("health_score", "Health Score", COLORS["health"]),
("satisfaction_score", "Satisfaction Score", COLORS["sat"]),
("crime_score", "Crime Score", COLORS["crime"]),
]
for idx, (key, label, color) in enumerate(component_info):
row, col = divmod(idx, 2)
ax = fig.add_subplot(gs[row, col])
ax.set_facecolor(PANEL_BG)
r_vals = [c[key] for c in random_ep["components"]]
a_vals = [c[key] for c in agent_ep["components"]]
# Smooth
r_s = np.convolve(r_vals, np.ones(5)/5, mode="valid") if len(r_vals) > 5 else r_vals
a_s = np.convolve(a_vals, np.ones(5)/5, mode="valid") if len(a_vals) > 5 else a_vals
ax.plot(r_vals, color=COLORS["random"], alpha=0.20, linewidth=0.8)
ax.plot(range(len(r_s)), r_s, color=COLORS["random"], linewidth=1.8,
label=f"Random (avg={np.mean(r_vals):.2f})")
ax.plot(a_vals, color=color, alpha=0.20, linewidth=0.8)
ax.plot(range(len(a_s)), a_s, color=color, linewidth=1.8,
label=f"Agent (avg={np.mean(a_vals):.2f})")
ax.fill_between(range(len(a_s)), a_s, alpha=0.10, color=color)
ax.set_ylim(0, 1.05)
ax.set_title(label, color=TEXT_COL, fontsize=12, fontweight="bold")
ax.set_xlabel("Turn", color=MUTED_COL, fontsize=9)
ax.set_ylabel("Score [0–1]", color=MUTED_COL, fontsize=9)
ax.tick_params(colors=MUTED_COL, labelsize=8)
for spine in ax.spines.values():
spine.set_edgecolor(GRID_COL)
ax.grid(color=GRID_COL, linewidth=0.4, linestyle="--")
ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=8)
fig.suptitle(
"CivicAI: Named Reward Components β€” Social Crisis Task",
color=TEXT_COL, fontsize=15, fontweight="bold", y=0.98
)
plt.savefig("assets/component_scores.png", dpi=150, facecolor=DARK_BG)
plt.close()
print(" Saved: assets/component_scores.png")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == "__main__":
print("\n[CivicAI] Generating Training Evidence Plots\n")
plot_reward_curve()
plot_comparison_chart()
plot_component_scores()
print("\n[CivicAI] All plots saved to assets/")
print(" assets/reward_curve.png")
print(" assets/comparison_chart.png")
print(" assets/component_scores.png")
print(" assets/evaluation_results.json")