"""
CivicAI — Training Evidence Generator
======================================

Produces three publication-quality plots saved to assets/:
  reward_curve.png       — Per-step reward over 50 turns (multi-agent baseline)
  comparison_chart.png   — Random vs Rule-Agent across all 3 tasks
  component_scores.png   — Economic / Health / Satisfaction / Crime breakdown

Run: venv/Scripts/python.exe scripts/generate_training_plots.py
"""

from __future__ import annotations

import os
import sys
import json

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import random
import numpy as np

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.ticker import MaxNLocator

from civicai.environment import CivicAIEnv
from civicai.models import Action, SubsidyPolicy
from civicai.reward import compute_reward, get_named_scores
from agents.orchestrator import Orchestrator

DARK_BG   = "#0f172a"
PANEL_BG  = "#1e293b"
GRID_COL  = "#334155"
TEXT_COL  = "#e2e8f0"
MUTED_COL = "#94a3b8"

COLORS = {
    "random":    "#ef4444",
    "agent":     "#06b6d4",
    "economic":  "#f59e0b",
    "health":    "#10b981",
    "sat":       "#a78bfa",
    "crime":     "#f97316",
}

os.makedirs("assets", exist_ok=True)


# ---------------------------------------------------------------------------
# Episode runners
# ---------------------------------------------------------------------------

def run_random_episode(task_id: str = "stabilize_economy", seed: int = 42) -> dict:
    rng = random.Random(seed)
    env = CivicAIEnv()
    obs = env.reset(task_id=task_id, seed=seed)
    rewards, components_history = [], []

    for _ in range(50):
        action = Action(
            tax_rate=rng.uniform(0.15, 0.50),
            healthcare_budget=rng.uniform(0.08, 0.35),
            education_budget=rng.uniform(0.05, 0.25),
            police_budget=rng.uniform(0.03, 0.18),
            subsidy_policy=rng.choice(list(SubsidyPolicy)),
        )
        obs, reward, done, info = env.step(action)
        rewards.append(reward)
        state = env.state()
        reward_obj = compute_reward(state, action)
        components_history.append(get_named_scores(reward_obj))
        if done:
            break

    return {"rewards": rewards, "components": components_history}


def run_agent_episode(task_id: str = "stabilize_economy") -> dict:
    env = CivicAIEnv()
    orch = Orchestrator(env)
    obs = orch.reset(task_id)
    rewards, components_history = [], []

    done = False
    while not done:
        obs, reward, done, info = orch.run_step()
        rewards.append(reward)
        state = env.state()
        action = Action()  # last action proxy — components come from state
        reward_obj = compute_reward(state, action)
        components_history.append(get_named_scores(reward_obj))

    return {"rewards": rewards, "components": components_history}


# ---------------------------------------------------------------------------
# Plot 1 — Reward Curve (single task, agent vs random)
# ---------------------------------------------------------------------------

def plot_reward_curve() -> None:
    print("  Generating reward_curve.png ...")
    random_ep = run_random_episode("stabilize_economy", seed=7)
    agent_ep  = run_agent_episode("stabilize_economy")

    fig, ax = plt.subplots(figsize=(11, 5))
    fig.patch.set_facecolor(DARK_BG)
    ax.set_facecolor(PANEL_BG)

    r_turns = range(len(random_ep["rewards"]))
    a_turns = range(len(agent_ep["rewards"]))

    r_smooth = np.convolve(random_ep["rewards"], np.ones(5)/5, mode="valid")
    a_smooth = np.convolve(agent_ep["rewards"],  np.ones(5)/5, mode="valid")

    ax.plot(r_turns, random_ep["rewards"], color=COLORS["random"], alpha=0.25, linewidth=1)
    ax.plot(range(len(r_smooth)), r_smooth, color=COLORS["random"], linewidth=2,
            label=f"Random Agent  (avg={np.mean(random_ep['rewards']):.3f})")

    ax.plot(a_turns, agent_ep["rewards"], color=COLORS["agent"], alpha=0.25, linewidth=1)
    ax.plot(range(len(a_smooth)), a_smooth, color=COLORS["agent"], linewidth=2,
            label=f"Rule Agent    (avg={np.mean(agent_ep['rewards']):.3f})")

    ax.fill_between(range(len(r_smooth)), r_smooth, alpha=0.08, color=COLORS["random"])
    ax.fill_between(range(len(a_smooth)), a_smooth, alpha=0.08, color=COLORS["agent"])

    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Turn (Quarter)", color=MUTED_COL, fontsize=11)
    ax.set_ylabel("Step Reward [0–1]", color=MUTED_COL, fontsize=11)
    ax.set_title("CivicAI: Reward Curve — Economic Stability Task",
                 color=TEXT_COL, fontsize=14, fontweight="bold", pad=12)
    ax.tick_params(colors=MUTED_COL)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    for spine in ax.spines.values():
        spine.set_edgecolor(GRID_COL)
    ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--")
    ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10)

    plt.tight_layout()
    plt.savefig("assets/reward_curve.png", dpi=150, facecolor=DARK_BG)
    plt.close()
    print("  Saved: assets/reward_curve.png")


# ---------------------------------------------------------------------------
# Plot 2 — Comparison Chart (3 tasks, agent vs random)
# ---------------------------------------------------------------------------

def plot_comparison_chart() -> None:
    print("  Generating comparison_chart.png ...")
    tasks = ["stabilize_economy", "manage_pandemic", "control_crisis"]
    labels = ["Economic\nStability", "Pandemic\nManagement", "Social\nCrisis"]
    n_ep = 3

    agent_means, agent_stds = [], []
    random_means, random_stds = [], []

    for task_id in tasks:
        a_rewards, r_rewards = [], []
        for seed in range(n_ep):
            r_ep = run_random_episode(task_id, seed=seed)
            a_ep = run_agent_episode(task_id)
            r_rewards.append(float(np.mean(r_ep["rewards"])))
            a_rewards.append(float(np.mean(a_ep["rewards"])))
        agent_means.append(float(np.mean(a_rewards)))
        agent_stds.append(float(np.std(a_rewards)))
        random_means.append(float(np.mean(r_rewards)))
        random_stds.append(float(np.std(r_rewards)))

    x = np.arange(len(tasks))
    w = 0.35

    fig, ax = plt.subplots(figsize=(10, 6))
    fig.patch.set_facecolor(DARK_BG)
    ax.set_facecolor(PANEL_BG)

    bars_r = ax.bar(x - w/2, random_means, w, yerr=random_stds,
                    label="Random Agent", color=COLORS["random"],
                    alpha=0.85, capsize=5, error_kw={"ecolor": "#fca5a5", "linewidth": 1.5})
    bars_a = ax.bar(x + w/2, agent_means, w, yerr=agent_stds,
                    label="Rule-Based Agent", color=COLORS["agent"],
                    alpha=0.85, capsize=5, error_kw={"ecolor": "#67e8f9", "linewidth": 1.5})

    # Value labels on bars
    for bar in bars_r:
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                f"{bar.get_height():.3f}", ha="center", color=COLORS["random"],
                fontsize=9, fontweight="bold")
    for bar in bars_a:
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                f"{bar.get_height():.3f}", ha="center", color=COLORS["agent"],
                fontsize=9, fontweight="bold")

    ax.set_xticks(x)
    ax.set_xticklabels(labels, color=TEXT_COL, fontsize=11)
    ax.set_ylim(0, 1.10)
    ax.set_ylabel("Avg Step Reward [0–1]", color=MUTED_COL, fontsize=11)
    ax.set_title("CivicAI: Before vs After — Agent vs Random Baseline",
                 color=TEXT_COL, fontsize=14, fontweight="bold", pad=12)
    ax.tick_params(colors=MUTED_COL)
    for spine in ax.spines.values():
        spine.set_edgecolor(GRID_COL)
    ax.grid(axis="y", color=GRID_COL, linewidth=0.5, linestyle="--")
    ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=10)

    plt.tight_layout()
    plt.savefig("assets/comparison_chart.png", dpi=150, facecolor=DARK_BG)
    plt.close()
    print("  Saved: assets/comparison_chart.png")

    # Save JSON results
    results = {
        t: {
            "agent_mean": round(agent_means[i], 4),
            "agent_std":  round(agent_stds[i],  4),
            "random_mean": round(random_means[i], 4),
            "random_std":  round(random_stds[i],  4),
            "improvement": round(agent_means[i] - random_means[i], 4),
        }
        for i, t in enumerate(tasks)
    }
    with open("assets/evaluation_results.json", "w") as f:
        json.dump(results, f, indent=2)
    print("  Saved: assets/evaluation_results.json")


# ---------------------------------------------------------------------------
# Plot 3 — Named Component Scores (economic/health/satisfaction/crime)
# ---------------------------------------------------------------------------

def plot_component_scores() -> None:
    print("  Generating component_scores.png ...")
    random_ep = run_random_episode("control_crisis", seed=13)
    agent_ep  = run_agent_episode("control_crisis")

    fig = plt.figure(figsize=(14, 9))
    fig.patch.set_facecolor(DARK_BG)
    gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.45, wspace=0.35)

    component_info = [
        ("economic_score",     "Economic Score",     COLORS["economic"]),
        ("health_score",       "Health Score",       COLORS["health"]),
        ("satisfaction_score", "Satisfaction Score", COLORS["sat"]),
        ("crime_score",        "Crime Score",        COLORS["crime"]),
    ]

    for idx, (key, label, color) in enumerate(component_info):
        row, col = divmod(idx, 2)
        ax = fig.add_subplot(gs[row, col])
        ax.set_facecolor(PANEL_BG)

        r_vals = [c[key] for c in random_ep["components"]]
        a_vals = [c[key] for c in agent_ep["components"]]

        # Smooth
        r_s = np.convolve(r_vals, np.ones(5)/5, mode="valid") if len(r_vals) > 5 else r_vals
        a_s = np.convolve(a_vals, np.ones(5)/5, mode="valid") if len(a_vals) > 5 else a_vals

        ax.plot(r_vals, color=COLORS["random"], alpha=0.20, linewidth=0.8)
        ax.plot(range(len(r_s)), r_s, color=COLORS["random"], linewidth=1.8,
                label=f"Random (avg={np.mean(r_vals):.2f})")
        ax.plot(a_vals, color=color, alpha=0.20, linewidth=0.8)
        ax.plot(range(len(a_s)), a_s, color=color, linewidth=1.8,
                label=f"Agent  (avg={np.mean(a_vals):.2f})")

        ax.fill_between(range(len(a_s)), a_s, alpha=0.10, color=color)

        ax.set_ylim(0, 1.05)
        ax.set_title(label, color=TEXT_COL, fontsize=12, fontweight="bold")
        ax.set_xlabel("Turn", color=MUTED_COL, fontsize=9)
        ax.set_ylabel("Score [0–1]", color=MUTED_COL, fontsize=9)
        ax.tick_params(colors=MUTED_COL, labelsize=8)
        for spine in ax.spines.values():
            spine.set_edgecolor(GRID_COL)
        ax.grid(color=GRID_COL, linewidth=0.4, linestyle="--")
        ax.legend(facecolor=PANEL_BG, edgecolor=GRID_COL, labelcolor=TEXT_COL, fontsize=8)

    fig.suptitle(
        "CivicAI: Named Reward Components — Social Crisis Task",
        color=TEXT_COL, fontsize=15, fontweight="bold", y=0.98
    )

    plt.savefig("assets/component_scores.png", dpi=150, facecolor=DARK_BG)
    plt.close()
    print("  Saved: assets/component_scores.png")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    print("\n[CivicAI] Generating Training Evidence Plots\n")
    plot_reward_curve()
    plot_comparison_chart()
    plot_component_scores()

    print("\n[CivicAI] All plots saved to assets/")
    print("  assets/reward_curve.png")
    print("  assets/comparison_chart.png")
    print("  assets/component_scores.png")
    print("  assets/evaluation_results.json")