File size: 6,464 Bytes
b4b2877
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python3
"""Aggregate results from the three new benchmark experiments."""
import os
import json
import glob
import numpy as np

ROOT = '${PULSE_ROOT}/results/exp_new'


def load_results(pattern):
    files = sorted(glob.glob(pattern))
    results = []
    for f in files:
        try:
            results.append(json.load(open(f)))
        except Exception as e:
            print(f"  ERR: {f}: {e}")
    return results


def aggregate_expA():
    """Missing modality: average across seeds per eval config."""
    print("\n" + "=" * 70)
    print("EXP A: Missing-modality robustness")
    print("=" * 70)

    for subdir in ['expA_missing', 'expA_baseline']:
        files = load_results(f'{ROOT}/{subdir}/*/results.json')
        if not files:
            print(f"  No results yet for {subdir}")
            continue
        print(f"\n-- {subdir} (n seeds = {len(files)}) --")
        # Group by eval config name; accumulate F1/Acc over seeds
        config_stats = {}
        for r in files:
            if 'eval_configs' not in r:
                continue
            for name, info in r['eval_configs'].items():
                config_stats.setdefault(name, {'f1': [], 'acc': [], 'active': info['active']})
                config_stats[name]['f1'].append(info['f1'])
                config_stats[name]['acc'].append(info['acc'])

        # Order: full, leave-one-out, singletons
        full_names = [n for n in config_stats if n == 'full']
        drop_names = sorted([n for n in config_stats if n.startswith('drop_')])
        only_names = sorted([n for n in config_stats if n.startswith('only_')])

        print(f"  {'Config':<22s}  {'Active modalities':<42s}  "
              f"{'F1 mean±std':<14s}  {'Acc mean±std':<14s}")
        print('  ' + '-' * 96)
        for grp in [full_names, drop_names, only_names]:
            for name in grp:
                d = config_stats[name]
                f1_m, f1_s = np.mean(d['f1']), np.std(d['f1'])
                ac_m, ac_s = np.mean(d['acc']), np.std(d['acc'])
                active = ','.join(d['active'])
                print(f"  {name:<22s}  {active:<42s}  "
                      f"{f1_m:.3f}±{f1_s:.3f}    {ac_m:.3f}±{ac_s:.3f}")


def aggregate_expB():
    """Grip regression: group by (backbone, mod_config), average over seeds."""
    print("\n" + "=" * 70)
    print("EXP B: Grip force regression")
    print("=" * 70)
    files = load_results(f'{ROOT}/expB_grip/*/results.json')
    if not files:
        print("  No results yet")
        return

    # Group
    groups = {}
    for r in files:
        if 'best_test_metrics' not in r:
            continue
        key = (r['backbone'], ','.join(r['modalities']))
        groups.setdefault(key, []).append(r)

    rows = []
    for (bb, mods), rs in groups.items():
        mae_R = [r['best_test_metrics']['right_hand']['mae_g'] for r in rs]
        mae_L = [r['best_test_metrics']['left_hand']['mae_g'] for r in rs]
        r_R = [r['best_test_metrics']['right_hand']['pearson_r'] for r in rs]
        r_L = [r['best_test_metrics']['left_hand']['pearson_r'] for r in rs]
        r2_R = [r['best_test_metrics']['right_hand']['r2'] for r in rs]
        r2_L = [r['best_test_metrics']['left_hand']['r2'] for r in rs]
        mae_avg = [r['best_test_metrics']['avg_mae_g'] for r in rs]
        r_avg = [r['best_test_metrics']['avg_pearson_r'] for r in rs]
        rows.append({
            'backbone': bb,
            'modalities': mods,
            'n_seeds': len(rs),
            'mae_R': (np.mean(mae_R), np.std(mae_R)),
            'mae_L': (np.mean(mae_L), np.std(mae_L)),
            'mae_avg': (np.mean(mae_avg), np.std(mae_avg)),
            'r_R': (np.mean(r_R), np.std(r_R)),
            'r_L': (np.mean(r_L), np.std(r_L)),
            'r_avg': (np.mean(r_avg), np.std(r_avg)),
            'r2_R': (np.mean(r2_R), np.std(r2_R)),
            'r2_L': (np.mean(r2_L), np.std(r2_L)),
        })
    rows.sort(key=lambda r: r['r_avg'][0], reverse=True)
    print(f"  {'Backbone':<12s}  {'Modalities':<30s}  N  "
          f"{'MAE(g) avg':<14s}  {'Pearson r avg':<14s}  {'R²(R)':<12s}  {'R²(L)':<12s}")
    print('  ' + '-' * 102)
    for row in rows:
        print(f"  {row['backbone']:<12s}  {row['modalities']:<30s}  {row['n_seeds']}  "
              f"{row['mae_avg'][0]:.1f}±{row['mae_avg'][1]:.1f}    "
              f"{row['r_avg'][0]:.3f}±{row['r_avg'][1]:.3f}    "
              f"{row['r2_R'][0]:.3f}±{row['r2_R'][1]:.3f}    "
              f"{row['r2_L'][0]:.3f}±{row['r2_L'][1]:.3f}")


def aggregate_expC():
    """T5 retrieval: group by mod config, average over seeds."""
    print("\n" + "=" * 70)
    print("EXP C: T5 Cross-modal text retrieval")
    print("=" * 70)
    files = load_results(f'{ROOT}/expC_retrieval/*/results.json')
    if not files:
        print("  No results yet")
        return
    groups = {}
    for r in files:
        if 'final_avg_over_3_pool_seeds' not in r:
            continue
        key = ','.join(r['modalities'])
        groups.setdefault(key, []).append(r)

    rows = []
    for mods, rs in groups.items():
        r1 = [r['final_avg_over_3_pool_seeds']['recall@1'] for r in rs]
        r5 = [r['final_avg_over_3_pool_seeds']['recall@5'] for r in rs]
        r10 = [r['final_avg_over_3_pool_seeds']['recall@10'] for r in rs]
        medR = [r['final_avg_over_3_pool_seeds']['median_rank'] for r in rs]
        rows.append({
            'modalities': mods,
            'n_seeds': len(rs),
            'r1': (np.mean(r1), np.std(r1)),
            'r5': (np.mean(r5), np.std(r5)),
            'r10': (np.mean(r10), np.std(r10)),
            'medR': (np.mean(medR), np.std(medR)),
            'n_test': rs[0].get('n_test_segments', 0),
            'K': rs[0].get('K_pool', 100),
        })
    rows.sort(key=lambda r: r['r10'][0], reverse=True)
    print(f"  {'Modalities':<30s}  N  N_test  K  "
          f"{'R@1':<12s}  {'R@5':<12s}  {'R@10':<12s}  {'medR':<12s}")
    print('  ' + '-' * 100)
    for row in rows:
        print(f"  {row['modalities']:<30s}  {row['n_seeds']}  {row['n_test']:<6d}  {row['K']:<2d}  "
              f"{row['r1'][0]:.3f}±{row['r1'][1]:.3f}  "
              f"{row['r5'][0]:.3f}±{row['r5'][1]:.3f}  "
              f"{row['r10'][0]:.3f}±{row['r10'][1]:.3f}  "
              f"{row['medR'][0]:.1f}±{row['medR'][1]:.1f}")


def main():
    aggregate_expA()
    aggregate_expB()
    aggregate_expC()


if __name__ == '__main__':
    main()