Spaces:

mahammadaftab
/

CivicAI

Sleeping

App Files Files Community

CivicAI / scripts /baseline_inference.py

mahammadaftab

Initial Update

315caa2 11 days ago

raw

history blame contribute delete

6.98 kB

	"""
	CivicAI Baseline Inference Script

	Uses OpenAI GPT-4o-mini to make policy decisions.
	Falls back to rule-based agent if no API key is available.
	"""

	from __future__ import annotations

	import json
	import os
	import sys

	# Add project root to path
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from civicai.environment import CivicAIEnv
	from civicai.models import Action, SubsidyPolicy


	def parse_action(response_text: str) -> Action:
	"""Parse LLM response into an Action. Falls back to defaults."""
	try:
	# Try JSON parse
	text = response_text.strip()
	if "```json" in text:
	text = text.split("```json")[1].split("```")[0]
	elif "```" in text:
	text = text.split("```")[1].split("```")[0]

	data = json.loads(text)
	return Action(
	tax_rate=max(0, min(1, float(data.get("tax_rate", 0.25)))),
	healthcare_budget=max(0, min(1, float(data.get("healthcare_budget", 0.20)))),
	education_budget=max(0, min(1, float(data.get("education_budget", 0.15)))),
	police_budget=max(0, min(1, float(data.get("police_budget", 0.10)))),
	subsidy_policy=SubsidyPolicy(data.get("subsidy_policy", "none")),
	emergency_response=data.get("emergency_response"),
	)
	except Exception:
	return Action() # Use defaults


	def build_prompt(obs_dict: dict) -> str:
	"""Build a structured prompt for the LLM."""
	return f"""You are an AI policy advisor managing a society. Analyze the current state and decide on policy actions.

	CURRENT STATE:
	- Turn: {obs_dict['turn']}/50
	- Population: {obs_dict['population']:,}
	- Employment Rate: {obs_dict['employment_rate']:.1%}
	- Inflation: {obs_dict['inflation']:.1%}
	- Public Satisfaction: {obs_dict['public_satisfaction']:.1%}
	- Health Index: {obs_dict['health_index']:.1%}
	- Crime Rate: {obs_dict['crime_rate']:.1%}
	- GDP: ${obs_dict['gdp']:.1f}B
	- Budget Balance: {obs_dict['budget_balance']:.1%}
	- Resources: {json.dumps(obs_dict['resources'], indent=2)}
	- Active Events: {obs_dict['active_events']}

	Respond with ONLY a JSON object (no other text):
	{{
	"tax_rate": 0.0-1.0,
	"healthcare_budget": 0.0-1.0,
	"education_budget": 0.0-1.0,
	"police_budget": 0.0-1.0,
	"subsidy_policy": "none\|agriculture\|industry\|technology",
	"emergency_response": null or "lockdown\|stimulus\|open"
	}}"""


	def run_llm_agent(task_id: str = "stabilize_economy") -> dict:
	"""Run baseline with OpenAI GPT-4o-mini."""
	from openai import OpenAI

	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	env = CivicAIEnv()
	obs = env.reset(task_id)
	total_reward = 0.0
	rewards = []
	steps = []

	print(f"\n{'='*60}")
	print(f" CivicAI Baseline — Task: {task_id}")
	print(f" Model: GPT-4o-mini")
	print(f"{'='*60}\n")

	for turn in range(50):
	prompt = build_prompt(obs.model_dump())

	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.3,
	)

	action = parse_action(response.choices[0].message.content or "")
	obs, reward, done, info = env.step(action)

	total_reward += reward
	rewards.append(reward)
	steps.append({
	"turn": turn,
	"action": action.model_dump(),
	"reward": reward,
	"obs": obs.model_dump(),
	})

	print(f" Turn {turn:2d} \| Reward: {reward:.3f} \| "
	f"Emp: {obs.employment_rate:.1%} \| Inf: {obs.inflation:.1%} \| "
	f"Sat: {obs.public_satisfaction:.1%} \| Crime: {obs.crime_rate:.1%}")

	if done:
	print(f"\n Episode ended: {info.get('termination_reason', 'max_steps')}")
	break

	print(f"\n{'='*60}")
	print(f" Total Reward: {total_reward:.4f}")
	print(f" Avg Reward: {total_reward / len(rewards):.4f}")
	print(f" Steps: {len(rewards)}")
	print(f"{'='*60}\n")

	return {
	"task_id": task_id,
	"total_reward": total_reward,
	"avg_reward": total_reward / len(rewards),
	"steps": len(rewards),
	"reward_curve": rewards,
	"step_details": steps,
	}


	def run_rule_agent(task_id: str = "stabilize_economy") -> dict:
	"""Run baseline with multi-agent rule-based system (no API key needed)."""
	from agents.orchestrator import Orchestrator

	env = CivicAIEnv()
	orch = Orchestrator(env)

	print(f"\n{'='*60}")
	print(f" CivicAI Baseline — Task: {task_id}")
	print(f" Model: Multi-Agent Rule System")
	print(f"{'='*60}\n")

	result = orch.run_episode(task_id)

	for i, r in enumerate(result["reward_curve"]):
	obs = result.get("step_log", [{}])[i] if i < len(result.get("step_log", [])) else {}
	print(f" Turn {i:2d} \| Reward: {r:.3f}")

	print(f"\n{'='*60}")
	print(f" Total Reward: {result['total_reward']:.4f}")
	print(f" Avg Reward: {result['avg_reward']:.4f}")
	print(f" Steps: {result['steps']}")
	print(f"{'='*60}\n")

	# Emergent insights
	summary = result.get("emergent_summary", {})
	if summary.get("key_insights"):
	print(" 🧠 Emergent Insights:")
	for insight in summary["key_insights"]:
	print(f" → {insight}")
	if summary.get("patterns"):
	print(" 📊 Patterns Detected:")
	for p in summary["patterns"]:
	print(f" → {p}")

	return result


	def run_random_agent(task_id: str = "stabilize_economy") -> dict:
	"""Run baseline with random actions."""
	import random

	env = CivicAIEnv()
	obs = env.reset(task_id)
	total_reward = 0.0
	rewards = []

	print(f"\n{'='*60}")
	print(f" CivicAI Baseline — Task: {task_id}")
	print(f" Model: Random Agent")
	print(f"{'='*60}\n")

	for turn in range(50):
	action = Action(
	tax_rate=random.uniform(0.1, 0.5),
	healthcare_budget=random.uniform(0.05, 0.4),
	education_budget=random.uniform(0.05, 0.3),
	police_budget=random.uniform(0.03, 0.2),
	subsidy_policy=random.choice(list(SubsidyPolicy)),
	)
	obs, reward, done, info = env.step(action)
	total_reward += reward
	rewards.append(reward)

	if done:
	break

	print(f" Total Reward: {total_reward:.4f}")
	print(f" Avg Reward: {total_reward / max(1, len(rewards)):.4f}")

	return {
	"task_id": task_id,
	"total_reward": total_reward,
	"avg_reward": total_reward / max(1, len(rewards)),
	"steps": len(rewards),
	"reward_curve": rewards,
	}


	if __name__ == "__main__":
	task = sys.argv[1] if len(sys.argv) > 1 else "stabilize_economy"

	if os.getenv("OPENAI_API_KEY"):
	run_llm_agent(task)
	else:
	print(" No OPENAI_API_KEY found. Running rule-based agent.\n")
	run_rule_agent(task)