File size: 678 Bytes
d9175ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def compute_grade(action_taken, correct_action, output, correct_answer):
    """
    Returns score between 0.0 and 1.0
    """

    # 1. Action correctness
    action_correct = 1.0 if action_taken == correct_action else 0.0

    # 2. Answer correctness
    answer_correct = 1.0 if output == correct_answer else 0.0

    # 3. Efficiency (simple version)
    if action_taken in ["use_calculator", "use_search"]:
        efficiency = 0.5   # using tool has cost
    else:
        efficiency = 1.0   # direct answer is efficient

    # Final score
    score = (
        0.4 * action_correct +
        0.5 * answer_correct +
        0.1 * efficiency
    )

    return round(score, 2)