Spaces:

mahammadaftab
/

CivicAI

Sleeping

App Files Files Community

CivicAI / scripts /generate_training_plots.py

mahammadaftab

Final updated

6298125 11 days ago

raw

history blame contribute delete

11.9 kB

	"""
	CivicAI — Training Evidence Generator
	======================================

	Produces three publication-quality plots saved to assets/:
	reward_curve.png — Per-step reward over 50 turns (multi-agent baseline)
	comparison_chart.png — Random vs Rule-Agent across all 3 tasks
	component_scores.png — Economic / Health / Satisfaction / Crime breakdown

	Run: venv/Scripts/python.exe scripts/generate_training_plots.py
	"""

	from __future__ import annotations

	import os
	import sys
	import json

	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	import random
	import numpy as np

	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import matplotlib.gridspec as gridspec
	from matplotlib.ticker import MaxNLocator

	from civicai.environment import CivicAIEnv
	from civicai.models import Action, SubsidyPolicy
	from civicai.reward import compute_reward, get_named_scores
	from agents.orchestrator import Orchestrator

	DARK_BG = "#0f172a"
	PANEL_BG = "#1e293b"
	GRID_COL = "#334155"
	TEXT_COL = "#e2e8f0"
	MUTED_COL = "#94a3b8"

	COLORS = {
	"random": "#ef4444",
	"agent": "#06b6d4",
	"economic": "#f59e0b",
	"health": "#10b981",
	"sat": "#a78bfa",
	"crime": "#f97316",
	}

	os.makedirs("assets", exist_ok=True)


	# ---------------------------------------------------------------------------
	# Episode runners
	# ---------------------------------------------------------------------------

	def run_random_episode(task_id: str = "stabilize_economy", seed: int = 42) -> dict:
	rng = random.Random(seed)
	env = CivicAIEnv()
	obs = env.reset(task_id=task_id, seed=seed)
	rewards, components_history = [], []

	for _ in range(50):
	action = Action(
	tax_rate=rng.uniform(0.15, 0.50),
	healthcare_budget=rng.uniform(0.08, 0.35),
	education_budget=rng.uniform(0.05, 0.25),
	police_budget=rng.uniform(0.03, 0.18),
	subsidy_policy=rng.choice(list(SubsidyPolicy)),
	)
	obs, reward, done, info = env.step(action)
	rewards.append(reward)
	state = env.state()
	reward_obj = compute_reward(state, action)
	components_history.append(get_named_scores(reward_obj))
	if done:
	break

	return {"rewards": rewards, "components": components_history}


	def run_agent_episode(task_id: str = "stabilize_economy") -> dict:
	env = CivicAIEnv()
	orch = Orchestrator(env)
	obs = orch.reset(task_id)
	rewards, components_history = [], []

	done = False
	while not done:
	obs, reward, done, info = orch.run_step()
	rewards.append(reward)
	state = env.state()
	action = Action() # last action proxy — components come from state
	reward_obj = compute_reward(state, action)
	components_history.append(get_named_scores(reward_obj))

	return {"rewards": rewards, "components": components_history}


	# ---------------------------------------------------------------------------
	# Plot 1 — Reward Curve (single task, agent vs random)
	# ---------------------------------------------------------------------------

	def plot_reward_curve() -> None:
	print(" Generating reward_curve.png ...")
	random_ep = run_random_episode("stabilize_economy", seed=7)
	agent_ep = run_agent_episode("stabilize_economy")

	fig, ax = plt.subplots(figsize=(11, 5))
	fig.patch.set_facecolor(DARK_BG)
	ax.set_facecolor(PANEL_BG)

	r_turns = range(len(random_ep["rewards"]))
	a_turns = range(len(agent_ep["rewards"]))

	r_smooth = np.convolve(random_ep["rewards"], np.ones(5)/5, mode="valid")
	a_smooth = np.convolve(agent_ep["rewards"], np.ones(5)/5, mode="valid")

	ax.plot(r_turns, random_ep["rewards"], color=COLORS["random"], alpha=0.25, linewidth=1)
	ax.plot(range(len(r_smooth)), r_smooth, color=COLORS["random"], linewidth=2,
	label=f"Random Agent (avg={np.mean(random_ep['rewards']):.3f})")

	ax.plot(a_turns, agent_ep["rewards"], color=COLORS["agent"], alpha=0.25, linewidth=1)
	ax.plot(range(len(a_smooth)), a_smooth, color=COLORS["agent"], linewidth=2,
	label=f"Rule Agent (avg={np.mean(agent_ep['rewards']):.3f})")

	ax.fill_between(range(len(r_smooth)), r_smooth, alpha=0.08, color=COLORS["random"])
	ax.fill_between(range(len(a_smooth)), a_smooth, alpha=0.08, color=COLORS["agent"])

	ax.set_ylim(0, 1.05)
	ax.set_xlabel("Turn (Quarter)", color=MUTED_COL, fontsize=11)
	ax.set_ylabel("Step Reward [0–1]", color=MUTED_COL, fontsize=11)
	ax.set_title("CivicAI: Reward Curve — Economic Stability Task",
	color=TEXT_COL, fontsize=14, fontweight="bold", pad=12)
	ax.tick_params(colors=MUTED_COL)
	ax.xaxis.set_major_locator(MaxNLocator(integer=True))
	for spine in ax.spines.values():
	spine.set_edgecolor(GRID_COL)
	ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--")
	ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10)

	plt.tight_layout()
	plt.savefig("assets/reward_curve.png", dpi=150, facecolor=DARK_BG)
	plt.close()
	print(" Saved: assets/reward_curve.png")


	# ---------------------------------------------------------------------------
	# Plot 2 — Comparison Chart (3 tasks, agent vs random)
	# ---------------------------------------------------------------------------

	def plot_comparison_chart() -> None:
	print(" Generating comparison_chart.png ...")
	tasks = ["stabilize_economy", "manage_pandemic", "control_crisis"]
	labels = ["Economic\nStability", "Pandemic\nManagement", "Social\nCrisis"]
	n_ep = 3

	agent_means, agent_stds = [], []
	random_means, random_stds = [], []

	for task_id in tasks:
	a_rewards, r_rewards = [], []
	for seed in range(n_ep):
	r_ep = run_random_episode(task_id, seed=seed)
	a_ep = run_agent_episode(task_id)
	r_rewards.append(float(np.mean(r_ep["rewards"])))
	a_rewards.append(float(np.mean(a_ep["rewards"])))
	agent_means.append(float(np.mean(a_rewards)))
	agent_stds.append(float(np.std(a_rewards)))
	random_means.append(float(np.mean(r_rewards)))
	random_stds.append(float(np.std(r_rewards)))

	x = np.arange(len(tasks))
	w = 0.35

	fig, ax = plt.subplots(figsize=(10, 6))
	fig.patch.set_facecolor(DARK_BG)
	ax.set_facecolor(PANEL_BG)

	bars_r = ax.bar(x - w/2, random_means, w, yerr=random_stds,
	label="Random Agent", color=COLORS["random"],
	alpha=0.85, capsize=5, error_kw={"ecolor": "#fca5a5", "linewidth": 1.5})
	bars_a = ax.bar(x + w/2, agent_means, w, yerr=agent_stds,
	label="Rule-Based Agent", color=COLORS["agent"],
	alpha=0.85, capsize=5, error_kw={"ecolor": "#67e8f9", "linewidth": 1.5})

	# Value labels on bars
	for bar in bars_r:
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
	f"{bar.get_height():.3f}", ha="center", color=COLORS["random"],
	fontsize=9, fontweight="bold")
	for bar in bars_a:
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
	f"{bar.get_height():.3f}", ha="center", color=COLORS["agent"],
	fontsize=9, fontweight="bold")

	ax.set_xticks(x)
	ax.set_xticklabels(labels, color=TEXT_COL, fontsize=11)
	ax.set_ylim(0, 1.10)
	ax.set_ylabel("Avg Step Reward [0–1]", color=MUTED_COL, fontsize=11)
	ax.set_title("CivicAI: Before vs After — Agent vs Random Baseline",
	color=TEXT_COL, fontsize=14, fontweight="bold", pad=12)
	ax.tick_params(colors=MUTED_COL)
	for spine in ax.spines.values():
	spine.set_edgecolor(GRID_COL)
	ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--")
	ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10)

	plt.tight_layout()
	plt.savefig("assets/comparison_chart.png", dpi=150, facecolor=DARK_BG)
	plt.close()
	print(" Saved: assets/comparison_chart.png")

	# Save JSON results
	results = {
	t: {
	"agent_mean": round(agent_means[i], 4),
	"agent_std": round(agent_stds[i], 4),
	"random_mean": round(random_means[i], 4),
	"random_std": round(random_stds[i], 4),
	"improvement": round(agent_means[i] - random_means[i], 4),
	}
	for i, t in enumerate(tasks)
	}
	with open("assets/evaluation_results.json", "w") as f:
	json.dump(results, f, indent=2)
	print(" Saved: assets/evaluation_results.json")


	# ---------------------------------------------------------------------------
	# Plot 3 — Named Component Scores (economic/health/satisfaction/crime)
	# ---------------------------------------------------------------------------

	def plot_component_scores() -> None:
	print(" Generating component_scores.png ...")
	random_ep = run_random_episode("control_crisis", seed=13)
	agent_ep = run_agent_episode("control_crisis")

	fig = plt.figure(figsize=(14, 9))
	fig.patch.set_facecolor(DARK_BG)
	gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.45, wspace=0.35)

	component_info = [
	("economic_score", "Economic Score", COLORS["economic"]),
	("health_score", "Health Score", COLORS["health"]),
	("satisfaction_score", "Satisfaction Score", COLORS["sat"]),
	("crime_score", "Crime Score", COLORS["crime"]),
	]

	for idx, (key, label, color) in enumerate(component_info):
	row, col = divmod(idx, 2)
	ax = fig.add_subplot(gs[row, col])
	ax.set_facecolor(PANEL_BG)

	r_vals = [c[key] for c in random_ep["components"]]
	a_vals = [c[key] for c in agent_ep["components"]]

	# Smooth
	r_s = np.convolve(r_vals, np.ones(5)/5, mode="valid") if len(r_vals) > 5 else r_vals
	a_s = np.convolve(a_vals, np.ones(5)/5, mode="valid") if len(a_vals) > 5 else a_vals

	ax.plot(r_vals, color=COLORS["random"], alpha=0.20, linewidth=0.8)
	ax.plot(range(len(r_s)), r_s, color=COLORS["random"], linewidth=1.8,
	label=f"Random (avg={np.mean(r_vals):.2f})")
	ax.plot(a_vals, color=color, alpha=0.20, linewidth=0.8)
	ax.plot(range(len(a_s)), a_s, color=color, linewidth=1.8,
	label=f"Agent (avg={np.mean(a_vals):.2f})")

	ax.fill_between(range(len(a_s)), a_s, alpha=0.10, color=color)

	ax.set_ylim(0, 1.05)
	ax.set_title(label, color=TEXT_COL, fontsize=12, fontweight="bold")
	ax.set_xlabel("Turn", color=MUTED_COL, fontsize=9)
	ax.set_ylabel("Score [0–1]", color=MUTED_COL, fontsize=9)
	ax.tick_params(colors=MUTED_COL, labelsize=8)
	for spine in ax.spines.values():
	spine.set_edgecolor(GRID_COL)
	ax.grid(color=GRID_COL, linewidth=0.4, linestyle="--")
	ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=8)

	fig.suptitle(
	"CivicAI: Named Reward Components — Social Crisis Task",
	color=TEXT_COL, fontsize=15, fontweight="bold", y=0.98
	)

	plt.savefig("assets/component_scores.png", dpi=150, facecolor=DARK_BG)
	plt.close()
	print(" Saved: assets/component_scores.png")


	# ---------------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------------

	if __name__ == "__main__":
	print("\n[CivicAI] Generating Training Evidence Plots\n")
	plot_reward_curve()
	plot_comparison_chart()
	plot_component_scores()

	print("\n[CivicAI] All plots saved to assets/")
	print(" assets/reward_curve.png")
	print(" assets/comparison_chart.png")
	print(" assets/component_scores.png")
	print(" assets/evaluation_results.json")