mahammadaftab's picture
clean initial commit
62851e9
"""
Random baseline agent.
Selects uniformly random actions from the valid action set.
Uses action masking to ensure only legal actions are chosen.
"""
import random
from typing import Dict, Tuple
class RandomAgent:
"""Baseline agent that takes random valid actions.
This agent serves as the performance lower bound.
"""
def __init__(self, seed: int = None):
if seed is not None:
random.seed(seed)
def act(self, state: Dict) -> Tuple[str, int]:
"""Choose a random valid action.
Args:
state: Observation dict from the environment.
Returns:
(action_type, target_id) tuple.
"""
valid_actions = state.get("valid_actions", [])
if valid_actions:
return random.choice(valid_actions)
# Fallback: pick from tasks or messages randomly
tasks = state.get("tasks", [])
inbox = state.get("inbox", [])
pending = [t for t in tasks if t["status"] == "pending"]
unreplied = [m for m in inbox if not m.get("replied", False)]
actions = []
for t in pending:
actions.append(("complete_task", t["id"]))
actions.append(("defer_task", t["id"]))
for m in unreplied:
actions.append(("send_reply", m["id"]))
if actions:
return random.choice(actions)
return ("defer_task", 0)
def __repr__(self):
return "RandomAgent()"