""" Rule-based heuristic agent. Uses a priority-driven strategy: 1. Complete high-priority tasks first. 2. Reply to urgent messages. 3. Schedule medium-priority tasks. 4. Reply to normal messages. 5. Defer or reject low-priority tasks. """ from typing import Dict, Tuple class RuleBasedAgent: """Heuristic agent that follows a priority-based decision tree. This agent serves as a strong baseline — better than random, and provides a performance reference for the RL agent to surpass. """ def act(self, state: Dict) -> Tuple[str, int]: """Choose an action based on priority heuristics. Args: state: Observation dict from the environment. Returns: (action_type, target_id) tuple. """ tasks = state.get("tasks", []) inbox = state.get("inbox", []) pending = [t for t in tasks if t["status"] == "pending"] unreplied = [m for m in inbox if not m.get("replied", False)] # Priority 1: Complete high-priority pending tasks high_tasks = [t for t in pending if t["priority"] == "high"] if high_tasks: # Pick the earliest one target = min(high_tasks, key=lambda t: t["time"]) return ("complete_task", target["id"]) # Priority 2: Reply to urgent messages urgent_msgs = [m for m in unreplied if m.get("urgency") == "high"] if urgent_msgs: return ("send_reply", urgent_msgs[0]["id"]) # Priority 3: Schedule medium-priority tasks medium_tasks = [t for t in pending if t["priority"] == "medium"] if medium_tasks: target = min(medium_tasks, key=lambda t: t["time"]) return ("schedule_task", target["id"]) # Priority 4: Reply to remaining messages if unreplied: return ("send_reply", unreplied[0]["id"]) # Priority 5: Complete low-priority tasks low_tasks = [t for t in pending if t["priority"] == "low"] if low_tasks: return ("complete_task", low_tasks[0]["id"]) # Fallback: Defer anything pending if pending: return ("defer_task", pending[0]["id"]) return ("defer_task", 0) def __repr__(self): return "RuleBasedAgent()"