mahammadaftab's picture
clean initial commit
62851e9
"""
Rule-based heuristic agent.
Uses a priority-driven strategy:
1. Complete high-priority tasks first.
2. Reply to urgent messages.
3. Schedule medium-priority tasks.
4. Reply to normal messages.
5. Defer or reject low-priority tasks.
"""
from typing import Dict, Tuple
class RuleBasedAgent:
"""Heuristic agent that follows a priority-based decision tree.
This agent serves as a strong baseline — better than random,
and provides a performance reference for the RL agent to surpass.
"""
def act(self, state: Dict) -> Tuple[str, int]:
"""Choose an action based on priority heuristics.
Args:
state: Observation dict from the environment.
Returns:
(action_type, target_id) tuple.
"""
tasks = state.get("tasks", [])
inbox = state.get("inbox", [])
pending = [t for t in tasks if t["status"] == "pending"]
unreplied = [m for m in inbox if not m.get("replied", False)]
# Priority 1: Complete high-priority pending tasks
high_tasks = [t for t in pending if t["priority"] == "high"]
if high_tasks:
# Pick the earliest one
target = min(high_tasks, key=lambda t: t["time"])
return ("complete_task", target["id"])
# Priority 2: Reply to urgent messages
urgent_msgs = [m for m in unreplied if m.get("urgency") == "high"]
if urgent_msgs:
return ("send_reply", urgent_msgs[0]["id"])
# Priority 3: Schedule medium-priority tasks
medium_tasks = [t for t in pending if t["priority"] == "medium"]
if medium_tasks:
target = min(medium_tasks, key=lambda t: t["time"])
return ("schedule_task", target["id"])
# Priority 4: Reply to remaining messages
if unreplied:
return ("send_reply", unreplied[0]["id"])
# Priority 5: Complete low-priority tasks
low_tasks = [t for t in pending if t["priority"] == "low"]
if low_tasks:
return ("complete_task", low_tasks[0]["id"])
# Fallback: Defer anything pending
if pending:
return ("defer_task", pending[0]["id"])
return ("defer_task", 0)
def __repr__(self):
return "RuleBasedAgent()"