| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745620551943, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0761249989271164, |
| "tpp_threshold_2_intended_diff_only": 0.09410000443458558, |
| "tpp_threshold_2_unintended_diff_only": 0.017975005507469177, |
| "tpp_threshold_5_total_metric": 0.16362500488758086, |
| "tpp_threshold_5_intended_diff_only": 0.20040000677108766, |
| "tpp_threshold_5_unintended_diff_only": 0.036775001883506776, |
| "tpp_threshold_10_total_metric": 0.20382500290870664, |
| "tpp_threshold_10_intended_diff_only": 0.26760001182556153, |
| "tpp_threshold_10_unintended_diff_only": 0.06377500891685486, |
| "tpp_threshold_20_total_metric": 0.2339750036597252, |
| "tpp_threshold_20_intended_diff_only": 0.3255000114440918, |
| "tpp_threshold_20_unintended_diff_only": 0.0915250077843666, |
| "tpp_threshold_50_total_metric": 0.25427500903606415, |
| "tpp_threshold_50_intended_diff_only": 0.39020001888275146, |
| "tpp_threshold_50_unintended_diff_only": 0.13592500984668732, |
| "tpp_threshold_100_total_metric": 0.25915002077817917, |
| "tpp_threshold_100_intended_diff_only": 0.4279000282287598, |
| "tpp_threshold_100_unintended_diff_only": 0.16875000745058058, |
| "tpp_threshold_500_total_metric": 0.21675003468990328, |
| "tpp_threshold_500_intended_diff_only": 0.4487000465393066, |
| "tpp_threshold_500_unintended_diff_only": 0.2319500118494034 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.13334999084472657, |
| "tpp_threshold_2_intended_diff_only": 0.1527999997138977, |
| "tpp_threshold_2_unintended_diff_only": 0.019450008869171143, |
| "tpp_threshold_5_total_metric": 0.2514500081539154, |
| "tpp_threshold_5_intended_diff_only": 0.30380001068115237, |
| "tpp_threshold_5_unintended_diff_only": 0.05235000252723694, |
| "tpp_threshold_10_total_metric": 0.28190000653266906, |
| "tpp_threshold_10_intended_diff_only": 0.3820000171661377, |
| "tpp_threshold_10_unintended_diff_only": 0.10010001063346863, |
| "tpp_threshold_20_total_metric": 0.28990001380443575, |
| "tpp_threshold_20_intended_diff_only": 0.4414000153541565, |
| "tpp_threshold_20_unintended_diff_only": 0.15150000154972076, |
| "tpp_threshold_50_total_metric": 0.2311500132083893, |
| "tpp_threshold_50_intended_diff_only": 0.46360002756118773, |
| "tpp_threshold_50_unintended_diff_only": 0.23245001435279847, |
| "tpp_threshold_100_total_metric": 0.17330000698566436, |
| "tpp_threshold_100_intended_diff_only": 0.4660000205039978, |
| "tpp_threshold_100_unintended_diff_only": 0.2927000135183334, |
| "tpp_threshold_500_total_metric": 0.10585003197193146, |
| "tpp_threshold_500_intended_diff_only": 0.4666000485420227, |
| "tpp_threshold_500_unintended_diff_only": 0.36075001657009126 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.018900007009506226, |
| "tpp_threshold_2_intended_diff_only": 0.035400009155273436, |
| "tpp_threshold_2_unintended_diff_only": 0.01650000214576721, |
| "tpp_threshold_5_total_metric": 0.07580000162124634, |
| "tpp_threshold_5_intended_diff_only": 0.09700000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.02120000123977661, |
| "tpp_threshold_10_total_metric": 0.12574999928474426, |
| "tpp_threshold_10_intended_diff_only": 0.15320000648498536, |
| "tpp_threshold_10_unintended_diff_only": 0.02745000720024109, |
| "tpp_threshold_20_total_metric": 0.17804999351501466, |
| "tpp_threshold_20_intended_diff_only": 0.2096000075340271, |
| "tpp_threshold_20_unintended_diff_only": 0.03155001401901245, |
| "tpp_threshold_50_total_metric": 0.277400004863739, |
| "tpp_threshold_50_intended_diff_only": 0.3168000102043152, |
| "tpp_threshold_50_unintended_diff_only": 0.03940000534057617, |
| "tpp_threshold_100_total_metric": 0.34500003457069395, |
| "tpp_threshold_100_intended_diff_only": 0.38980003595352175, |
| "tpp_threshold_100_unintended_diff_only": 0.04480000138282776, |
| "tpp_threshold_500_total_metric": 0.32765003740787507, |
| "tpp_threshold_500_intended_diff_only": 0.4308000445365906, |
| "tpp_threshold_500_unintended_diff_only": 0.10315000712871551 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.23.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.23.hook_resid_post", |
| "hook_layer": 23, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.19550004601478577, |
| "tpp_threshold_2_intended_diff_only": 0.21900004148483276, |
| "tpp_threshold_2_unintended_diff_only": 0.023499995470046997, |
| "tpp_threshold_5_total_metric": 0.20075003802776337, |
| "tpp_threshold_5_intended_diff_only": 0.2560000419616699, |
| "tpp_threshold_5_unintended_diff_only": 0.055250003933906555, |
| "tpp_threshold_10_total_metric": 0.22700005769729614, |
| "tpp_threshold_10_intended_diff_only": 0.3280000686645508, |
| "tpp_threshold_10_unintended_diff_only": 0.10100001096725464, |
| "tpp_threshold_20_total_metric": 0.257250040769577, |
| "tpp_threshold_20_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_20_unintended_diff_only": 0.16975000500679016, |
| "tpp_threshold_50_total_metric": 0.19175001978874207, |
| "tpp_threshold_50_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_50_unintended_diff_only": 0.2552500069141388, |
| "tpp_threshold_100_total_metric": 0.16925005614757538, |
| "tpp_threshold_100_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_100_unintended_diff_only": 0.2837500125169754, |
| "tpp_threshold_500_total_metric": 0.06925004720687866, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.3837500214576721 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.17299996316432953, |
| "tpp_threshold_2_intended_diff_only": 0.2149999737739563, |
| "tpp_threshold_2_unintended_diff_only": 0.04200001060962677, |
| "tpp_threshold_5_total_metric": 0.2099999636411667, |
| "tpp_threshold_5_intended_diff_only": 0.2849999666213989, |
| "tpp_threshold_5_unintended_diff_only": 0.07500000298023224, |
| "tpp_threshold_10_total_metric": 0.2634999603033066, |
| "tpp_threshold_10_intended_diff_only": 0.35899996757507324, |
| "tpp_threshold_10_unintended_diff_only": 0.09550000727176666, |
| "tpp_threshold_20_total_metric": 0.2812499850988388, |
| "tpp_threshold_20_intended_diff_only": 0.43199998140335083, |
| "tpp_threshold_20_unintended_diff_only": 0.15074999630451202, |
| "tpp_threshold_50_total_metric": 0.1860000044107437, |
| "tpp_threshold_50_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_50_unintended_diff_only": 0.271000012755394, |
| "tpp_threshold_100_total_metric": 0.12849995493888855, |
| "tpp_threshold_100_intended_diff_only": 0.4599999785423279, |
| "tpp_threshold_100_unintended_diff_only": 0.33150002360343933, |
| "tpp_threshold_500_total_metric": 0.070250004529953, |
| "tpp_threshold_500_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_500_unintended_diff_only": 0.3907500207424164 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.15200002491474152, |
| "tpp_threshold_2_intended_diff_only": 0.15900003910064697, |
| "tpp_threshold_2_unintended_diff_only": 0.0070000141859054565, |
| "tpp_threshold_5_total_metric": 0.30650000274181366, |
| "tpp_threshold_5_intended_diff_only": 0.3460000157356262, |
| "tpp_threshold_5_unintended_diff_only": 0.03950001299381256, |
| "tpp_threshold_10_total_metric": 0.3255000114440918, |
| "tpp_threshold_10_intended_diff_only": 0.3920000195503235, |
| "tpp_threshold_10_unintended_diff_only": 0.06650000810623169, |
| "tpp_threshold_20_total_metric": 0.35200005769729614, |
| "tpp_threshold_20_intended_diff_only": 0.440000057220459, |
| "tpp_threshold_20_unintended_diff_only": 0.08799999952316284, |
| "tpp_threshold_50_total_metric": 0.27000004053115845, |
| "tpp_threshold_50_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_50_unintended_diff_only": 0.18000000715255737, |
| "tpp_threshold_100_total_metric": 0.1807500123977661, |
| "tpp_threshold_100_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_100_unintended_diff_only": 0.27125000953674316, |
| "tpp_threshold_500_total_metric": 0.06800004839897156, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.3850000202655792 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.11524996161460876, |
| "tpp_threshold_2_intended_diff_only": 0.12699997425079346, |
| "tpp_threshold_2_unintended_diff_only": 0.011750012636184692, |
| "tpp_threshold_5_total_metric": 0.3265000432729721, |
| "tpp_threshold_5_intended_diff_only": 0.3630000352859497, |
| "tpp_threshold_5_unintended_diff_only": 0.0364999920129776, |
| "tpp_threshold_10_total_metric": 0.372749999165535, |
| "tpp_threshold_10_intended_diff_only": 0.4710000157356262, |
| "tpp_threshold_10_unintended_diff_only": 0.09825001657009125, |
| "tpp_threshold_20_total_metric": 0.33900000154972076, |
| "tpp_threshold_20_intended_diff_only": 0.49000000953674316, |
| "tpp_threshold_20_unintended_diff_only": 0.1510000079870224, |
| "tpp_threshold_50_total_metric": 0.27900002896785736, |
| "tpp_threshold_50_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_50_unintended_diff_only": 0.21300001442432404, |
| "tpp_threshold_100_total_metric": 0.21124999225139618, |
| "tpp_threshold_100_intended_diff_only": 0.4909999966621399, |
| "tpp_threshold_100_unintended_diff_only": 0.2797500044107437, |
| "tpp_threshold_500_total_metric": 0.1990000307559967, |
| "tpp_threshold_500_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.2930000126361847 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.030999958515167236, |
| "tpp_threshold_2_intended_diff_only": 0.04399996995925903, |
| "tpp_threshold_2_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_5_total_metric": 0.2134999930858612, |
| "tpp_threshold_5_intended_diff_only": 0.26899999380111694, |
| "tpp_threshold_5_unintended_diff_only": 0.05550000071525574, |
| "tpp_threshold_10_total_metric": 0.22075000405311584, |
| "tpp_threshold_10_intended_diff_only": 0.36000001430511475, |
| "tpp_threshold_10_unintended_diff_only": 0.1392500102519989, |
| "tpp_threshold_20_total_metric": 0.2199999839067459, |
| "tpp_threshold_20_intended_diff_only": 0.4179999828338623, |
| "tpp_threshold_20_unintended_diff_only": 0.1979999989271164, |
| "tpp_threshold_50_total_metric": 0.22899997234344482, |
| "tpp_threshold_50_intended_diff_only": 0.47200000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.24300003051757812, |
| "tpp_threshold_100_total_metric": 0.17675001919269562, |
| "tpp_threshold_100_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.29725001752376556, |
| "tpp_threshold_500_total_metric": 0.12275002896785736, |
| "tpp_threshold_500_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_500_unintended_diff_only": 0.3512500077486038 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.006750032305717468, |
| "tpp_threshold_2_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_2_unintended_diff_only": 0.014249995350837708, |
| "tpp_threshold_5_total_metric": 0.058999985456466675, |
| "tpp_threshold_5_intended_diff_only": 0.07400000095367432, |
| "tpp_threshold_5_unintended_diff_only": 0.015000015497207642, |
| "tpp_threshold_10_total_metric": 0.08449997007846832, |
| "tpp_threshold_10_intended_diff_only": 0.10699999332427979, |
| "tpp_threshold_10_unintended_diff_only": 0.022500023245811462, |
| "tpp_threshold_20_total_metric": 0.15725001692771912, |
| "tpp_threshold_20_intended_diff_only": 0.17800003290176392, |
| "tpp_threshold_20_unintended_diff_only": 0.0207500159740448, |
| "tpp_threshold_50_total_metric": 0.3165000379085541, |
| "tpp_threshold_50_intended_diff_only": 0.34400004148483276, |
| "tpp_threshold_50_unintended_diff_only": 0.027500003576278687, |
| "tpp_threshold_100_total_metric": 0.39350003004074097, |
| "tpp_threshold_100_intended_diff_only": 0.4180000424385071, |
| "tpp_threshold_100_unintended_diff_only": 0.024500012397766113, |
| "tpp_threshold_500_total_metric": 0.4050000309944153, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.050999999046325684 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.030249983072280884, |
| "tpp_threshold_2_intended_diff_only": 0.06499999761581421, |
| "tpp_threshold_2_unintended_diff_only": 0.034750014543533325, |
| "tpp_threshold_5_total_metric": 0.14499996602535248, |
| "tpp_threshold_5_intended_diff_only": 0.18799996376037598, |
| "tpp_threshold_5_unintended_diff_only": 0.0429999977350235, |
| "tpp_threshold_10_total_metric": 0.23799997568130493, |
| "tpp_threshold_10_intended_diff_only": 0.2879999876022339, |
| "tpp_threshold_10_unintended_diff_only": 0.050000011920928955, |
| "tpp_threshold_20_total_metric": 0.29649995267391205, |
| "tpp_threshold_20_intended_diff_only": 0.35899996757507324, |
| "tpp_threshold_20_unintended_diff_only": 0.0625000149011612, |
| "tpp_threshold_50_total_metric": 0.34449996054172516, |
| "tpp_threshold_50_intended_diff_only": 0.4229999780654907, |
| "tpp_threshold_50_unintended_diff_only": 0.07850001752376556, |
| "tpp_threshold_100_total_metric": 0.35600002110004425, |
| "tpp_threshold_100_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_100_unintended_diff_only": 0.0820000022649765, |
| "tpp_threshold_500_total_metric": 0.34675002098083496, |
| "tpp_threshold_500_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_500_unintended_diff_only": 0.09125000238418579 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.0037500113248825073, |
| "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.00024999678134918213, |
| "tpp_threshold_5_total_metric": -0.0007499903440475464, |
| "tpp_threshold_5_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_5_unintended_diff_only": 0.004749998450279236, |
| "tpp_threshold_10_total_metric": 0.02199999988079071, |
| "tpp_threshold_10_intended_diff_only": 0.03200000524520874, |
| "tpp_threshold_10_unintended_diff_only": 0.01000000536441803, |
| "tpp_threshold_20_total_metric": 0.07174995541572571, |
| "tpp_threshold_20_intended_diff_only": 0.08499997854232788, |
| "tpp_threshold_20_unintended_diff_only": 0.013250023126602173, |
| "tpp_threshold_50_total_metric": 0.15949998795986176, |
| "tpp_threshold_50_intended_diff_only": 0.1769999861717224, |
| "tpp_threshold_50_unintended_diff_only": 0.017499998211860657, |
| "tpp_threshold_100_total_metric": 0.27275000512599945, |
| "tpp_threshold_100_intended_diff_only": 0.3009999990463257, |
| "tpp_threshold_100_unintended_diff_only": 0.028249993920326233, |
| "tpp_threshold_500_total_metric": 0.2707500159740448, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.1612500250339508 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.029249995946884155, |
| "tpp_threshold_2_intended_diff_only": 0.03299999237060547, |
| "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135, |
| "tpp_threshold_5_total_metric": 0.12550000846385956, |
| "tpp_threshold_5_intended_diff_only": 0.13899999856948853, |
| "tpp_threshold_5_unintended_diff_only": 0.013499990105628967, |
| "tpp_threshold_10_total_metric": 0.1860000491142273, |
| "tpp_threshold_10_intended_diff_only": 0.20500004291534424, |
| "tpp_threshold_10_unintended_diff_only": 0.018999993801116943, |
| "tpp_threshold_20_total_metric": 0.23925001919269562, |
| "tpp_threshold_20_intended_diff_only": 0.26200002431869507, |
| "tpp_threshold_20_unintended_diff_only": 0.02275000512599945, |
| "tpp_threshold_50_total_metric": 0.34925001859664917, |
| "tpp_threshold_50_intended_diff_only": 0.37300002574920654, |
| "tpp_threshold_50_unintended_diff_only": 0.023750007152557373, |
| "tpp_threshold_100_total_metric": 0.38550005853176117, |
| "tpp_threshold_100_intended_diff_only": 0.41200006008148193, |
| "tpp_threshold_100_unintended_diff_only": 0.026500001549720764, |
| "tpp_threshold_500_total_metric": 0.2977500557899475, |
| "tpp_threshold_500_intended_diff_only": 0.43500006198883057, |
| "tpp_threshold_500_unintended_diff_only": 0.13725000619888306 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.024500012397766113, |
| "tpp_threshold_2_intended_diff_only": 0.054000020027160645, |
| "tpp_threshold_2_unintended_diff_only": 0.02950000762939453, |
| "tpp_threshold_5_total_metric": 0.050250038504600525, |
| "tpp_threshold_5_intended_diff_only": 0.08000004291534424, |
| "tpp_threshold_5_unintended_diff_only": 0.029750004410743713, |
| "tpp_threshold_10_total_metric": 0.09825000166893005, |
| "tpp_threshold_10_intended_diff_only": 0.1340000033378601, |
| "tpp_threshold_10_unintended_diff_only": 0.035750001668930054, |
| "tpp_threshold_20_total_metric": 0.12550002336502075, |
| "tpp_threshold_20_intended_diff_only": 0.1640000343322754, |
| "tpp_threshold_20_unintended_diff_only": 0.03850001096725464, |
| "tpp_threshold_50_total_metric": 0.2172500193119049, |
| "tpp_threshold_50_intended_diff_only": 0.2670000195503235, |
| "tpp_threshold_50_unintended_diff_only": 0.04975000023841858, |
| "tpp_threshold_100_total_metric": 0.317250058054924, |
| "tpp_threshold_100_intended_diff_only": 0.3800000548362732, |
| "tpp_threshold_100_unintended_diff_only": 0.06274999678134918, |
| "tpp_threshold_500_total_metric": 0.31800006330013275, |
| "tpp_threshold_500_intended_diff_only": 0.393000066280365, |
| "tpp_threshold_500_unintended_diff_only": 0.07500000298023224 |
| } |
| } |
| } |
| } |