| """ |
| Random baseline agent. |
| |
| Selects uniformly random actions from the valid action set. |
| Uses action masking to ensure only legal actions are chosen. |
| """ |
|
|
| import random |
| from typing import Dict, Tuple |
|
|
|
|
| class RandomAgent: |
| """Baseline agent that takes random valid actions. |
| |
| This agent serves as the performance lower bound. |
| """ |
|
|
| def __init__(self, seed: int = None): |
| if seed is not None: |
| random.seed(seed) |
|
|
| def act(self, state: Dict) -> Tuple[str, int]: |
| """Choose a random valid action. |
| |
| Args: |
| state: Observation dict from the environment. |
| |
| Returns: |
| (action_type, target_id) tuple. |
| """ |
| valid_actions = state.get("valid_actions", []) |
|
|
| if valid_actions: |
| return random.choice(valid_actions) |
|
|
| |
| tasks = state.get("tasks", []) |
| inbox = state.get("inbox", []) |
| pending = [t for t in tasks if t["status"] == "pending"] |
| unreplied = [m for m in inbox if not m.get("replied", False)] |
|
|
| actions = [] |
| for t in pending: |
| actions.append(("complete_task", t["id"])) |
| actions.append(("defer_task", t["id"])) |
| for m in unreplied: |
| actions.append(("send_reply", m["id"])) |
|
|
| if actions: |
| return random.choice(actions) |
|
|
| return ("defer_task", 0) |
|
|
| def __repr__(self): |
| return "RandomAgent()" |
|
|