Spaces:

mahammadaftab
/

CivicAI

Sleeping

File size: 6,982 Bytes

315caa2

"""
CivicAI Baseline Inference Script

Uses OpenAI GPT-4o-mini to make policy decisions.
Falls back to rule-based agent if no API key is available.
"""

from __future__ import annotations

import json
import os
import sys

# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from civicai.environment import CivicAIEnv
from civicai.models import Action, SubsidyPolicy


def parse_action(response_text: str) -> Action:
    """Parse LLM response into an Action. Falls back to defaults."""
    try:
        # Try JSON parse
        text = response_text.strip()
        if "```json" in text:
            text = text.split("```json")[1].split("```")[0]
        elif "```" in text:
            text = text.split("```")[1].split("```")[0]

        data = json.loads(text)
        return Action(
            tax_rate=max(0, min(1, float(data.get("tax_rate", 0.25)))),
            healthcare_budget=max(0, min(1, float(data.get("healthcare_budget", 0.20)))),
            education_budget=max(0, min(1, float(data.get("education_budget", 0.15)))),
            police_budget=max(0, min(1, float(data.get("police_budget", 0.10)))),
            subsidy_policy=SubsidyPolicy(data.get("subsidy_policy", "none")),
            emergency_response=data.get("emergency_response"),
        )
    except Exception:
        return Action()  # Use defaults


def build_prompt(obs_dict: dict) -> str:
    """Build a structured prompt for the LLM."""
    return f"""You are an AI policy advisor managing a society. Analyze the current state and decide on policy actions.

CURRENT STATE:
- Turn: {obs_dict['turn']}/50
- Population: {obs_dict['population']:,}
- Employment Rate: {obs_dict['employment_rate']:.1%}
- Inflation: {obs_dict['inflation']:.1%}
- Public Satisfaction: {obs_dict['public_satisfaction']:.1%}
- Health Index: {obs_dict['health_index']:.1%}
- Crime Rate: {obs_dict['crime_rate']:.1%}
- GDP: ${obs_dict['gdp']:.1f}B
- Budget Balance: {obs_dict['budget_balance']:.1%}
- Resources: {json.dumps(obs_dict['resources'], indent=2)}
- Active Events: {obs_dict['active_events']}

Respond with ONLY a JSON object (no other text):
{{
    "tax_rate": 0.0-1.0,
    "healthcare_budget": 0.0-1.0,
    "education_budget": 0.0-1.0,
    "police_budget": 0.0-1.0,
    "subsidy_policy": "none|agriculture|industry|technology",
    "emergency_response": null or "lockdown|stimulus|open"
}}"""


def run_llm_agent(task_id: str = "stabilize_economy") -> dict:
    """Run baseline with OpenAI GPT-4o-mini."""
    from openai import OpenAI

    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    env = CivicAIEnv()
    obs = env.reset(task_id)
    total_reward = 0.0
    rewards = []
    steps = []

    print(f"\n{'='*60}")
    print(f"  CivicAI Baseline — Task: {task_id}")
    print(f"  Model: GPT-4o-mini")
    print(f"{'='*60}\n")

    for turn in range(50):
        prompt = build_prompt(obs.model_dump())

        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
        )

        action = parse_action(response.choices[0].message.content or "")
        obs, reward, done, info = env.step(action)

        total_reward += reward
        rewards.append(reward)
        steps.append({
            "turn": turn,
            "action": action.model_dump(),
            "reward": reward,
            "obs": obs.model_dump(),
        })

        print(f"  Turn {turn:2d} | Reward: {reward:.3f} | "
              f"Emp: {obs.employment_rate:.1%} | Inf: {obs.inflation:.1%} | "
              f"Sat: {obs.public_satisfaction:.1%} | Crime: {obs.crime_rate:.1%}")

        if done:
            print(f"\n  Episode ended: {info.get('termination_reason', 'max_steps')}")
            break

    print(f"\n{'='*60}")
    print(f"  Total Reward: {total_reward:.4f}")
    print(f"  Avg Reward:   {total_reward / len(rewards):.4f}")
    print(f"  Steps:        {len(rewards)}")
    print(f"{'='*60}\n")

    return {
        "task_id": task_id,
        "total_reward": total_reward,
        "avg_reward": total_reward / len(rewards),
        "steps": len(rewards),
        "reward_curve": rewards,
        "step_details": steps,
    }


def run_rule_agent(task_id: str = "stabilize_economy") -> dict:
    """Run baseline with multi-agent rule-based system (no API key needed)."""
    from agents.orchestrator import Orchestrator

    env = CivicAIEnv()
    orch = Orchestrator(env)

    print(f"\n{'='*60}")
    print(f"  CivicAI Baseline — Task: {task_id}")
    print(f"  Model: Multi-Agent Rule System")
    print(f"{'='*60}\n")

    result = orch.run_episode(task_id)

    for i, r in enumerate(result["reward_curve"]):
        obs = result.get("step_log", [{}])[i] if i < len(result.get("step_log", [])) else {}
        print(f"  Turn {i:2d} | Reward: {r:.3f}")

    print(f"\n{'='*60}")
    print(f"  Total Reward: {result['total_reward']:.4f}")
    print(f"  Avg Reward:   {result['avg_reward']:.4f}")
    print(f"  Steps:        {result['steps']}")
    print(f"{'='*60}\n")

    # Emergent insights
    summary = result.get("emergent_summary", {})
    if summary.get("key_insights"):
        print("  🧠 Emergent Insights:")
        for insight in summary["key_insights"]:
            print(f"    → {insight}")
    if summary.get("patterns"):
        print("  📊 Patterns Detected:")
        for p in summary["patterns"]:
            print(f"    → {p}")

    return result


def run_random_agent(task_id: str = "stabilize_economy") -> dict:
    """Run baseline with random actions."""
    import random

    env = CivicAIEnv()
    obs = env.reset(task_id)
    total_reward = 0.0
    rewards = []

    print(f"\n{'='*60}")
    print(f"  CivicAI Baseline — Task: {task_id}")
    print(f"  Model: Random Agent")
    print(f"{'='*60}\n")

    for turn in range(50):
        action = Action(
            tax_rate=random.uniform(0.1, 0.5),
            healthcare_budget=random.uniform(0.05, 0.4),
            education_budget=random.uniform(0.05, 0.3),
            police_budget=random.uniform(0.03, 0.2),
            subsidy_policy=random.choice(list(SubsidyPolicy)),
        )
        obs, reward, done, info = env.step(action)
        total_reward += reward
        rewards.append(reward)

        if done:
            break

    print(f"  Total Reward: {total_reward:.4f}")
    print(f"  Avg Reward:   {total_reward / max(1, len(rewards)):.4f}")

    return {
        "task_id": task_id,
        "total_reward": total_reward,
        "avg_reward": total_reward / max(1, len(rewards)),
        "steps": len(rewards),
        "reward_curve": rewards,
    }


if __name__ == "__main__":
    task = sys.argv[1] if len(sys.argv) > 1 else "stabilize_economy"

    if os.getenv("OPENAI_API_KEY"):
        run_llm_agent(task)
    else:
        print("  No OPENAI_API_KEY found. Running rule-based agent.\n")
        run_rule_agent(task)