| import json |
| from collections import defaultdict |
|
|
| |
| QUESTION_TYPES = { |
| 'object_rel_distance': 'Rel. Dist', |
| 'object_rel_direction': 'Rel. Dir', |
| 'route_planning': 'Route Plan', |
| 'obj_appearance_order': 'Appr. Order', |
| |
| 'object_counting': 'Obj. Count', |
| 'object_abs_distance': 'Abs. Dist', |
| 'object_size_estimation': 'Obj. Size', |
| 'room_size_estimation': 'Room Size' |
| } |
|
|
| def merge_difficulty_levels(type_scores): |
| merged_scores = defaultdict(list) |
| for question_type, scores in type_scores.items(): |
| if any(diff in question_type for diff in ['_easy', '_medium', '_hard']): |
| base_type = question_type.split('_easy')[0].split('_medium')[0].split('_hard')[0] |
| merged_scores[base_type].extend(scores) |
| else: |
| merged_scores[question_type].extend(scores) |
| return merged_scores |
|
|
| def calculate_average_scores_vsibench(data): |
| type_scores = defaultdict(list) |
| all_scores = [] |
| |
| for item in data: |
| type_scores[item['original_question_type']].append(item['score']) |
| all_scores.append(item['score']) |
| |
| merged_scores = merge_difficulty_levels(type_scores) |
| |
| |
| print("\nType Score Count") |
| print("-" * 35) |
| type_averages = {} |
| |
| |
| first_group = ['object_rel_distance', 'object_rel_direction', |
| 'route_planning', 'obj_appearance_order'] |
| for qtype in first_group: |
| if qtype in merged_scores: |
| scores = merged_scores[qtype] |
| avg_score = sum(scores) / len(scores) |
| type_averages[qtype] = avg_score |
| print(f"{QUESTION_TYPES[qtype]:<14} {avg_score:.3f} {len(scores)}") |
| |
| |
| print("-" * 35) |
| |
| |
| second_group = ['object_counting', 'object_abs_distance', |
| 'object_size_estimation', 'room_size_estimation'] |
| for qtype in second_group: |
| if qtype in merged_scores: |
| scores = merged_scores[qtype] |
| avg_score = sum(scores) / len(scores) |
| type_averages[qtype] = avg_score |
| print(f"{QUESTION_TYPES[qtype]:<14} {avg_score:.3f} {len(scores)}") |
| |
| overall_score = sum(type_averages.values()) / len(type_averages) |
| print("-" * 35) |
| print(f"Overall {overall_score:.3f} {len(all_scores)}") |
|
|
|
|