Spaces:
Sleeping
Sleeping
| def compute_grade(action_taken, correct_action, output, correct_answer): | |
| """ | |
| Returns score between 0.0 and 1.0 | |
| """ | |
| # 1. Action correctness | |
| action_correct = 1.0 if action_taken == correct_action else 0.0 | |
| # 2. Answer correctness | |
| answer_correct = 1.0 if output == correct_answer else 0.0 | |
| # 3. Efficiency (simple version) | |
| if action_taken in ["use_calculator", "use_search"]: | |
| efficiency = 0.5 # using tool has cost | |
| else: | |
| efficiency = 1.0 # direct answer is efficient | |
| # Final score | |
| score = ( | |
| 0.4 * action_correct + | |
| 0.5 * answer_correct + | |
| 0.1 * efficiency | |
| ) | |
| return round(score, 2) |