Spaces:
Sleeping
Sleeping
| from .models import Action, Email, State | |
| from .reward import compute_dense_reward | |
| def grade_easy(action: Action, email: Email) -> float: | |
| if email.id == "e1": | |
| if action.action_type == "reply": | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(False) | |
| if email.id == "e2": | |
| if action.action_type == "mark_spam": | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(False) | |
| return compute_dense_reward(False) | |
| def grade_medium(action: Action, email: Email) -> float: | |
| if email.id == "m1": | |
| # Missing order ID logic -> should request info | |
| if action.action_type == "request_info": | |
| return compute_dense_reward(True) | |
| elif action.action_type == "reply" and action.response_text: | |
| if "order" in action.response_text.lower() and "id" in action.response_text.lower(): | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(True) * 0.5 | |
| elif action.action_type == "forward" and action.forward_to and "support" in action.forward_to.lower(): | |
| # Escalating an incomplete ticket rather than asking for info gets partial score | |
| return compute_dense_reward(True) * 0.3 | |
| return compute_dense_reward(False) | |
| elif email.id == "m2": | |
| if action.action_type in ["mark_spam", "archive"]: | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(False) | |
| elif email.id == "m3": | |
| if action.action_type == "reply" and action.response_text and len(action.response_text) > 5: | |
| return compute_dense_reward(True) | |
| elif action.action_type == "reply": | |
| return compute_dense_reward(True) * 0.5 | |
| return compute_dense_reward(False) | |
| return compute_dense_reward(False) | |
| def grade_hard(action: Action, email: Email) -> float: | |
| if email.id == "h1": # VIP Issue | |
| # We need reply containing "investigating" or "downtime" | |
| base = 0.0 | |
| if action.action_type == "reply": | |
| if action.response_text and "investigat" in action.response_text.lower(): | |
| base = 1.0 | |
| else: | |
| base = 0.5 | |
| elif action.action_type == "escalate": | |
| if action.priority_level == "urgent": | |
| base = 1.0 | |
| else: | |
| base = 0.7 | |
| return compute_dense_reward(True) * base | |
| elif email.id == "h2": # Alert | |
| if action.action_type == "escalate": | |
| if action.priority_level == "urgent": | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(True) * 0.7 | |
| elif action.action_type == "forward" and action.forward_to and "eng" in action.forward_to.lower(): | |
| if action.priority_level == "urgent": | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(True) * 0.5 | |
| return compute_dense_reward(False) | |
| elif email.id == "h3": # Compliance | |
| if action.action_type == "reply": | |
| if action.response_text and "acknowledge" in action.response_text.lower(): | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(True) * 0.4 | |
| elif action.action_type == "forward" and action.forward_to and "legal" in action.forward_to.lower(): | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(False) | |
| elif email.id == "h4": # CEO Phish Attack | |
| if action.action_type == "mark_spam": | |
| return compute_dense_reward(True) | |
| elif action.action_type == "archive" or action.action_type == "escalate": | |
| return compute_dense_reward(True) * 0.5 | |
| return compute_dense_reward(False) | |
| elif email.id == "h5": # Intern project | |
| if action.action_type == "reply": | |
| return compute_dense_reward(True) | |
| return compute_dense_reward(False) | |
| return compute_dense_reward(False) | |
| def grade_action(task_level: str, action: Action, email: Email, state: State) -> float: | |
| tasks_sizes = {"easy": 2, "medium": 3, "hard": 5} | |
| size = tasks_sizes.get(task_level, 1) | |
| if task_level == "easy": | |
| r = grade_easy(action, email) | |
| elif task_level == "medium": | |
| r = grade_medium(action, email) | |
| elif task_level == "hard": | |
| r = grade_hard(action, email) | |
| else: | |
| r = 0.0 | |
| # Strictly bound reward between 0.0 and 1.0 for this step | |
| # Max episode score stays bounded since pop removes 1 email, at max 1 for each email / sizes = 1.0 total max | |
| scaled_reward = max(0.0, min(1.0, r / size)) | |
| return scaled_reward | |