Spaces:

mahammadaftab
/

CivicAI

Sleeping

File size: 6,248 Bytes

315caa2

"""
CivicAI Evaluation & Metrics Script

Runs multiple episodes across all tasks, computes metrics,
generates reward curve plots and comparison tables.
"""

from __future__ import annotations

import json
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np

from civicai.environment import CivicAIEnv
from civicai.models import Action, SubsidyPolicy
from agents.orchestrator import Orchestrator


def run_evaluation(episodes_per_task: int = 5) -> dict:
    """Run comprehensive evaluation across all tasks and agent types."""
    tasks = ["stabilize_economy", "manage_pandemic", "control_crisis"]
    results = {}

    for task_id in tasks:
        print(f"\n{'='*50}")
        print(f"  Evaluating: {task_id}")
        print(f"{'='*50}")

        # Multi-agent baseline
        agent_rewards = []
        for ep in range(episodes_per_task):
            env = CivicAIEnv()
            orch = Orchestrator(env)
            result = orch.run_episode(task_id)
            agent_rewards.append(result["avg_reward"])
            print(f"    Agent ep{ep}: avg_reward={result['avg_reward']:.4f}")

        # Random baseline
        random_rewards = []
        import random
        for ep in range(episodes_per_task):
            env = CivicAIEnv()
            obs = env.reset(task_id)
            total_r = 0.0
            count = 0
            for _ in range(50):
                action = Action(
                    tax_rate=random.uniform(0.1, 0.5),
                    healthcare_budget=random.uniform(0.05, 0.4),
                    education_budget=random.uniform(0.05, 0.3),
                    police_budget=random.uniform(0.03, 0.2),
                    subsidy_policy=random.choice(list(SubsidyPolicy)),
                )
                obs, r, done, _ = env.step(action)
                total_r += r
                count += 1
                if done:
                    break
            random_rewards.append(total_r / max(1, count))

        results[task_id] = {
            "agent_mean": round(float(np.mean(agent_rewards)), 4),
            "agent_std": round(float(np.std(agent_rewards)), 4),
            "random_mean": round(float(np.mean(random_rewards)), 4),
            "random_std": round(float(np.std(random_rewards)), 4),
            "improvement": round(float(np.mean(agent_rewards) - np.mean(random_rewards)), 4),
        }

    return results


def generate_plots(results: dict) -> None:
    """Generate evaluation plots."""
    os.makedirs("assets", exist_ok=True)

    # --- Comparison Bar Chart ---
    fig, ax = plt.subplots(figsize=(10, 6))
    fig.patch.set_facecolor("#0f172a")
    ax.set_facecolor("#1e293b")

    tasks = list(results.keys())
    x = np.arange(len(tasks))
    width = 0.35

    agent_means = [results[t]["agent_mean"] for t in tasks]
    random_means = [results[t]["random_mean"] for t in tasks]
    agent_stds = [results[t]["agent_std"] for t in tasks]
    random_stds = [results[t]["random_std"] for t in tasks]

    bars1 = ax.bar(x - width/2, random_means, width, yerr=random_stds,
                   label="Random", color="#ef4444", alpha=0.8, capsize=4)
    bars2 = ax.bar(x + width/2, agent_means, width, yerr=agent_stds,
                   label="Multi-Agent", color="#06b6d4", alpha=0.8, capsize=4)

    ax.set_ylabel("Avg Reward", color="white", fontsize=12)
    ax.set_title("CivicAI: Agent vs Random Performance", color="white", fontsize=14, fontweight="bold")
    ax.set_xticks(x)
    task_labels = ["Economic\nStability", "Pandemic\nManagement", "Social\nCrisis"]
    ax.set_xticklabels(task_labels, color="white")
    ax.tick_params(colors="white")
    ax.legend(facecolor="#1e293b", edgecolor="#334155", labelcolor="white")
    ax.spines["bottom"].set_color("#334155")
    ax.spines["left"].set_color("#334155")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.set_ylim(0, 1)

    plt.tight_layout()
    plt.savefig("assets/comparison_chart.png", dpi=150, facecolor="#0f172a")
    plt.close()
    print("  Saved: assets/comparison_chart.png")

    # --- Reward Curve ---
    fig, ax = plt.subplots(figsize=(10, 5))
    fig.patch.set_facecolor("#0f172a")
    ax.set_facecolor("#1e293b")

    env = CivicAIEnv()
    orch = Orchestrator(env)
    result = orch.run_episode("stabilize_economy")
    curve = result["reward_curve"]

    ax.plot(curve, color="#06b6d4", linewidth=2, label="Multi-Agent Reward")
    ax.fill_between(range(len(curve)), curve, alpha=0.15, color="#06b6d4")
    ax.axhline(y=np.mean(curve), color="#a855f7", linestyle="--", alpha=0.7, label=f"Mean: {np.mean(curve):.3f}")

    ax.set_xlabel("Turn", color="white", fontsize=12)
    ax.set_ylabel("Reward", color="white", fontsize=12)
    ax.set_title("CivicAI: Reward Curve (Economic Stability)", color="white", fontsize=14, fontweight="bold")
    ax.tick_params(colors="white")
    ax.legend(facecolor="#1e293b", edgecolor="#334155", labelcolor="white")
    ax.spines["bottom"].set_color("#334155")
    ax.spines["left"].set_color("#334155")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    plt.tight_layout()
    plt.savefig("assets/reward_curve.png", dpi=150, facecolor="#0f172a")
    plt.close()
    print("  Saved: assets/reward_curve.png")


def print_results_table(results: dict) -> None:
    """Print a formatted results table."""
    print(f"\n{'='*70}")
    print(f"  {'Task':<25} {'Random':>10} {'Agent':>10} {'Improve':>10}")
    print(f"  {'-'*55}")
    for task_id, r in results.items():
        print(f"  {task_id:<25} {r['random_mean']:>10.4f} {r['agent_mean']:>10.4f} {r['improvement']:>+10.4f}")
    print(f"{'='*70}")


if __name__ == "__main__":
    episodes = int(sys.argv[1]) if len(sys.argv) > 1 else 3

    print("\n[CivicAI] Evaluation Suite\n")
    results = run_evaluation(episodes)
    print_results_table(results)
    generate_plots(results)

    # Save results
    os.makedirs("assets", exist_ok=True)
    with open("assets/evaluation_results.json", "w") as f:
        json.dump(results, f, indent=2)
    print("\n  Results saved to assets/evaluation_results.json")