File size: 730 Bytes
1b64cba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from app.state import EnvironmentState


def compute_reward(state: EnvironmentState, action_type: str, info: dict) -> float:
    reward = 0.0

    # --- Correctness ---
    if info.get("correct_action"):
        reward += 0.2

    # Cost for asking info (tradeoff)
    if action_type == "request_info":
        reward -= 0.05  # cost for querying
    elif info.get("incorrect_action"):
        reward -= 0.2

    # --- Progress ---
    if info.get("task_progress"):
        reward += 0.2

    # --- Step penalty (efficiency)
    reward -= 0.01

    # --- Deadline penalty ---
    for hidden in state.hidden_email_states:
        if hidden.deadline and state.timestep > hidden.deadline:
            reward -= 0.5

    return reward