| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752142040, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.03477500677108764, |
| "tpp_threshold_2_intended_diff_only": 0.05070000886917114, |
| "tpp_threshold_2_unintended_diff_only": 0.015925002098083497, |
| "tpp_threshold_5_total_metric": 0.08220000118017197, |
| "tpp_threshold_5_intended_diff_only": 0.13479999899864198, |
| "tpp_threshold_5_unintended_diff_only": 0.05259999781847, |
| "tpp_threshold_10_total_metric": 0.12745000869035722, |
| "tpp_threshold_10_intended_diff_only": 0.20530000925064085, |
| "tpp_threshold_10_unintended_diff_only": 0.07785000056028366, |
| "tpp_threshold_20_total_metric": 0.1848000019788742, |
| "tpp_threshold_20_intended_diff_only": 0.29010000824928284, |
| "tpp_threshold_20_unintended_diff_only": 0.10530000627040863, |
| "tpp_threshold_50_total_metric": 0.24222500920295714, |
| "tpp_threshold_50_intended_diff_only": 0.3974000155925751, |
| "tpp_threshold_50_unintended_diff_only": 0.1551750063896179, |
| "tpp_threshold_100_total_metric": 0.24040002822875978, |
| "tpp_threshold_100_intended_diff_only": 0.4265000343322754, |
| "tpp_threshold_100_unintended_diff_only": 0.18610000610351562, |
| "tpp_threshold_500_total_metric": 0.1902750343084335, |
| "tpp_threshold_500_intended_diff_only": 0.43320004343986507, |
| "tpp_threshold_500_unintended_diff_only": 0.24292500913143159 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.0628500074148178, |
| "tpp_threshold_2_intended_diff_only": 0.09020000696182251, |
| "tpp_threshold_2_unintended_diff_only": 0.0273499995470047, |
| "tpp_threshold_5_total_metric": 0.1393500030040741, |
| "tpp_threshold_5_intended_diff_only": 0.23519999980926515, |
| "tpp_threshold_5_unintended_diff_only": 0.09584999680519105, |
| "tpp_threshold_10_total_metric": 0.17985000014305114, |
| "tpp_threshold_10_intended_diff_only": 0.3194000005722046, |
| "tpp_threshold_10_unintended_diff_only": 0.13955000042915344, |
| "tpp_threshold_20_total_metric": 0.22805000543594361, |
| "tpp_threshold_20_intended_diff_only": 0.4182000160217285, |
| "tpp_threshold_20_unintended_diff_only": 0.1901500105857849, |
| "tpp_threshold_50_total_metric": 0.18135001063346862, |
| "tpp_threshold_50_intended_diff_only": 0.45540001392364504, |
| "tpp_threshold_50_unintended_diff_only": 0.27405000329017637, |
| "tpp_threshold_100_total_metric": 0.13180003464221954, |
| "tpp_threshold_100_intended_diff_only": 0.4618000388145447, |
| "tpp_threshold_100_unintended_diff_only": 0.3300000041723251, |
| "tpp_threshold_500_total_metric": 0.07050002515316009, |
| "tpp_threshold_500_intended_diff_only": 0.462600040435791, |
| "tpp_threshold_500_unintended_diff_only": 0.39210001528263094 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.006700006127357483, |
| "tpp_threshold_2_intended_diff_only": 0.011200010776519775, |
| "tpp_threshold_2_unintended_diff_only": 0.004500004649162293, |
| "tpp_threshold_5_total_metric": 0.025049999356269836, |
| "tpp_threshold_5_intended_diff_only": 0.034399998188018796, |
| "tpp_threshold_5_unintended_diff_only": 0.009349998831748963, |
| "tpp_threshold_10_total_metric": 0.07505001723766327, |
| "tpp_threshold_10_intended_diff_only": 0.09120001792907714, |
| "tpp_threshold_10_unintended_diff_only": 0.01615000069141388, |
| "tpp_threshold_20_total_metric": 0.1415499985218048, |
| "tpp_threshold_20_intended_diff_only": 0.16200000047683716, |
| "tpp_threshold_20_unintended_diff_only": 0.020450001955032347, |
| "tpp_threshold_50_total_metric": 0.30310000777244567, |
| "tpp_threshold_50_intended_diff_only": 0.33940001726150515, |
| "tpp_threshold_50_unintended_diff_only": 0.03630000948905945, |
| "tpp_threshold_100_total_metric": 0.3490000218153, |
| "tpp_threshold_100_intended_diff_only": 0.3912000298500061, |
| "tpp_threshold_100_unintended_diff_only": 0.04220000803470612, |
| "tpp_threshold_500_total_metric": 0.31005004346370696, |
| "tpp_threshold_500_intended_diff_only": 0.4038000464439392, |
| "tpp_threshold_500_unintended_diff_only": 0.09375000298023224 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.2.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.2.hook_resid_post", |
| "hook_layer": 2, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.0015000253915786743, |
| "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.002499982714653015, |
| "tpp_threshold_5_total_metric": 0.13375000655651093, |
| "tpp_threshold_5_intended_diff_only": 0.3240000009536743, |
| "tpp_threshold_5_unintended_diff_only": 0.1902499943971634, |
| "tpp_threshold_10_total_metric": 0.14675001800060272, |
| "tpp_threshold_10_intended_diff_only": 0.36900001764297485, |
| "tpp_threshold_10_unintended_diff_only": 0.22224999964237213, |
| "tpp_threshold_20_total_metric": 0.15550000965595245, |
| "tpp_threshold_20_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.255500003695488, |
| "tpp_threshold_50_total_metric": 0.14350000023841858, |
| "tpp_threshold_50_intended_diff_only": 0.4259999990463257, |
| "tpp_threshold_50_unintended_diff_only": 0.2824999988079071, |
| "tpp_threshold_100_total_metric": 0.09800004959106445, |
| "tpp_threshold_100_intended_diff_only": 0.4410000443458557, |
| "tpp_threshold_100_unintended_diff_only": 0.34299999475479126, |
| "tpp_threshold_500_total_metric": 0.05625005066394806, |
| "tpp_threshold_500_intended_diff_only": 0.4450000524520874, |
| "tpp_threshold_500_unintended_diff_only": 0.38875000178813934 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.08274997770786285, |
| "tpp_threshold_2_intended_diff_only": 0.1119999885559082, |
| "tpp_threshold_2_unintended_diff_only": 0.02925001084804535, |
| "tpp_threshold_5_total_metric": 0.1704999804496765, |
| "tpp_threshold_5_intended_diff_only": 0.24699997901916504, |
| "tpp_threshold_5_unintended_diff_only": 0.07649999856948853, |
| "tpp_threshold_10_total_metric": 0.20374996960163116, |
| "tpp_threshold_10_intended_diff_only": 0.30799996852874756, |
| "tpp_threshold_10_unintended_diff_only": 0.1042499989271164, |
| "tpp_threshold_20_total_metric": 0.19849999248981476, |
| "tpp_threshold_20_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.21250002086162567, |
| "tpp_threshold_50_total_metric": 0.19599996507167816, |
| "tpp_threshold_50_intended_diff_only": 0.4599999785423279, |
| "tpp_threshold_50_unintended_diff_only": 0.2640000134706497, |
| "tpp_threshold_100_total_metric": 0.15550000965595245, |
| "tpp_threshold_100_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_100_unintended_diff_only": 0.30550001561641693, |
| "tpp_threshold_500_total_metric": 0.06925000250339508, |
| "tpp_threshold_500_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_500_unintended_diff_only": 0.3917500227689743 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.06725001335144043, |
| "tpp_threshold_2_intended_diff_only": 0.11400002241134644, |
| "tpp_threshold_2_unintended_diff_only": 0.046750009059906006, |
| "tpp_threshold_5_total_metric": 0.11875000596046448, |
| "tpp_threshold_5_intended_diff_only": 0.16200000047683716, |
| "tpp_threshold_5_unintended_diff_only": 0.04324999451637268, |
| "tpp_threshold_10_total_metric": 0.17650003731250763, |
| "tpp_threshold_10_intended_diff_only": 0.2890000343322754, |
| "tpp_threshold_10_unintended_diff_only": 0.11249999701976776, |
| "tpp_threshold_20_total_metric": 0.21000000834465027, |
| "tpp_threshold_20_intended_diff_only": 0.36500000953674316, |
| "tpp_threshold_20_unintended_diff_only": 0.1550000011920929, |
| "tpp_threshold_50_total_metric": 0.1445000320672989, |
| "tpp_threshold_50_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_50_unintended_diff_only": 0.2894999831914902, |
| "tpp_threshold_100_total_metric": 0.10350005328655243, |
| "tpp_threshold_100_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_100_unintended_diff_only": 0.3464999943971634, |
| "tpp_threshold_500_total_metric": 0.04075004160404205, |
| "tpp_threshold_500_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_500_unintended_diff_only": 0.40925000607967377 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.0040000081062316895, |
| "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.0, |
| "tpp_threshold_5_total_metric": 0.0832500010728836, |
| "tpp_threshold_5_intended_diff_only": 0.1850000023841858, |
| "tpp_threshold_5_unintended_diff_only": 0.10175000131130219, |
| "tpp_threshold_10_total_metric": 0.13499996066093445, |
| "tpp_threshold_10_intended_diff_only": 0.24299997091293335, |
| "tpp_threshold_10_unintended_diff_only": 0.1080000102519989, |
| "tpp_threshold_20_total_metric": 0.2659999877214432, |
| "tpp_threshold_20_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_20_unintended_diff_only": 0.17800001800060272, |
| "tpp_threshold_50_total_metric": 0.2070000171661377, |
| "tpp_threshold_50_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_50_unintended_diff_only": 0.281000018119812, |
| "tpp_threshold_100_total_metric": 0.1507500261068344, |
| "tpp_threshold_100_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_100_unintended_diff_only": 0.3372500091791153, |
| "tpp_threshold_500_total_metric": 0.10725000500679016, |
| "tpp_threshold_500_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_500_unintended_diff_only": 0.38075003027915955 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.1587500125169754, |
| "tpp_threshold_2_intended_diff_only": 0.21700000762939453, |
| "tpp_threshold_2_unintended_diff_only": 0.05824999511241913, |
| "tpp_threshold_5_total_metric": 0.19050002098083496, |
| "tpp_threshold_5_intended_diff_only": 0.2580000162124634, |
| "tpp_threshold_5_unintended_diff_only": 0.06749999523162842, |
| "tpp_threshold_10_total_metric": 0.23725001513957977, |
| "tpp_threshold_10_intended_diff_only": 0.3880000114440918, |
| "tpp_threshold_10_unintended_diff_only": 0.15074999630451202, |
| "tpp_threshold_20_total_metric": 0.31025002896785736, |
| "tpp_threshold_20_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_20_unintended_diff_only": 0.1497500091791153, |
| "tpp_threshold_50_total_metric": 0.21575003862380981, |
| "tpp_threshold_50_intended_diff_only": 0.46900004148483276, |
| "tpp_threshold_50_unintended_diff_only": 0.25325000286102295, |
| "tpp_threshold_100_total_metric": 0.15125003457069397, |
| "tpp_threshold_100_intended_diff_only": 0.46900004148483276, |
| "tpp_threshold_100_unintended_diff_only": 0.3177500069141388, |
| "tpp_threshold_500_total_metric": 0.07900002598762512, |
| "tpp_threshold_500_intended_diff_only": 0.46900004148483276, |
| "tpp_threshold_500_unintended_diff_only": 0.39000001549720764 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.0005000382661819458, |
| "tpp_threshold_2_intended_diff_only": 0.006000041961669922, |
| "tpp_threshold_2_unintended_diff_only": 0.005500003695487976, |
| "tpp_threshold_5_total_metric": 0.039750054478645325, |
| "tpp_threshold_5_intended_diff_only": 0.04300004243850708, |
| "tpp_threshold_5_unintended_diff_only": 0.0032499879598617554, |
| "tpp_threshold_10_total_metric": 0.07875004410743713, |
| "tpp_threshold_10_intended_diff_only": 0.09400004148483276, |
| "tpp_threshold_10_unintended_diff_only": 0.01524999737739563, |
| "tpp_threshold_20_total_metric": 0.15825000405311584, |
| "tpp_threshold_20_intended_diff_only": 0.17500001192092896, |
| "tpp_threshold_20_unintended_diff_only": 0.01675000786781311, |
| "tpp_threshold_50_total_metric": 0.3187500387430191, |
| "tpp_threshold_50_intended_diff_only": 0.3450000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.026249989867210388, |
| "tpp_threshold_100_total_metric": 0.3982500284910202, |
| "tpp_threshold_100_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_100_unintended_diff_only": 0.020750001072883606, |
| "tpp_threshold_500_total_metric": 0.35325007140636444, |
| "tpp_threshold_500_intended_diff_only": 0.43000006675720215, |
| "tpp_threshold_500_unintended_diff_only": 0.07674999535083771 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.028750047087669373, |
| "tpp_threshold_2_intended_diff_only": 0.030000030994415283, |
| "tpp_threshold_2_unintended_diff_only": 0.0012499839067459106, |
| "tpp_threshold_5_total_metric": 0.042750000953674316, |
| "tpp_threshold_5_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_5_unintended_diff_only": 0.026250004768371582, |
| "tpp_threshold_10_total_metric": 0.08200006186962128, |
| "tpp_threshold_10_intended_diff_only": 0.10700005292892456, |
| "tpp_threshold_10_unintended_diff_only": 0.024999991059303284, |
| "tpp_threshold_20_total_metric": 0.17275004088878632, |
| "tpp_threshold_20_intended_diff_only": 0.20100003480911255, |
| "tpp_threshold_20_unintended_diff_only": 0.028249993920326233, |
| "tpp_threshold_50_total_metric": 0.32625000178813934, |
| "tpp_threshold_50_intended_diff_only": 0.3880000114440918, |
| "tpp_threshold_50_unintended_diff_only": 0.061750009655952454, |
| "tpp_threshold_100_total_metric": 0.340750053524971, |
| "tpp_threshold_100_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_100_unintended_diff_only": 0.08025000989437103, |
| "tpp_threshold_500_total_metric": 0.27125006914138794, |
| "tpp_threshold_500_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_500_unintended_diff_only": 0.1497499942779541 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.010250017046928406, |
| "tpp_threshold_2_intended_diff_only": -0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.006250008940696716, |
| "tpp_threshold_5_total_metric": -0.012500032782554626, |
| "tpp_threshold_5_intended_diff_only": -0.012000024318695068, |
| "tpp_threshold_5_unintended_diff_only": 0.0005000084638595581, |
| "tpp_threshold_10_total_metric": 0.04099997878074646, |
| "tpp_threshold_10_intended_diff_only": 0.04799997806549072, |
| "tpp_threshold_10_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_20_total_metric": 0.08899997174739838, |
| "tpp_threshold_20_intended_diff_only": 0.10399997234344482, |
| "tpp_threshold_20_unintended_diff_only": 0.015000000596046448, |
| "tpp_threshold_50_total_metric": 0.37675000727176666, |
| "tpp_threshold_50_intended_diff_only": 0.38700002431869507, |
| "tpp_threshold_50_unintended_diff_only": 0.010250017046928406, |
| "tpp_threshold_100_total_metric": 0.38850003480911255, |
| "tpp_threshold_100_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_100_unintended_diff_only": 0.011500000953674316, |
| "tpp_threshold_500_total_metric": 0.3317500352859497, |
| "tpp_threshold_500_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_500_unintended_diff_only": 0.06825000047683716 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.010250002145767212, |
| "tpp_threshold_2_intended_diff_only": 0.013000011444091797, |
| "tpp_threshold_2_unintended_diff_only": 0.002750009298324585, |
| "tpp_threshold_5_total_metric": 0.010000020265579224, |
| "tpp_threshold_5_intended_diff_only": 0.013000011444091797, |
| "tpp_threshold_5_unintended_diff_only": 0.0029999911785125732, |
| "tpp_threshold_10_total_metric": 0.046250030398368835, |
| "tpp_threshold_10_intended_diff_only": 0.057000041007995605, |
| "tpp_threshold_10_unintended_diff_only": 0.01075001060962677, |
| "tpp_threshold_20_total_metric": 0.08349999785423279, |
| "tpp_threshold_20_intended_diff_only": 0.09799998998641968, |
| "tpp_threshold_20_unintended_diff_only": 0.01449999213218689, |
| "tpp_threshold_50_total_metric": 0.19300000369548798, |
| "tpp_threshold_50_intended_diff_only": 0.23500001430511475, |
| "tpp_threshold_50_unintended_diff_only": 0.04200001060962677, |
| "tpp_threshold_100_total_metric": 0.3107500374317169, |
| "tpp_threshold_100_intended_diff_only": 0.3580000400543213, |
| "tpp_threshold_100_unintended_diff_only": 0.04725000262260437, |
| "tpp_threshold_500_total_metric": 0.3112500458955765, |
| "tpp_threshold_500_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_500_unintended_diff_only": 0.09675000607967377 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.00424996018409729, |
| "tpp_threshold_2_intended_diff_only": 0.010999977588653564, |
| "tpp_threshold_2_unintended_diff_only": 0.006750017404556274, |
| "tpp_threshold_5_total_metric": 0.045249953866004944, |
| "tpp_threshold_5_intended_diff_only": 0.05899995565414429, |
| "tpp_threshold_5_unintended_diff_only": 0.013750001788139343, |
| "tpp_threshold_10_total_metric": 0.12724997103214264, |
| "tpp_threshold_10_intended_diff_only": 0.1499999761581421, |
| "tpp_threshold_10_unintended_diff_only": 0.02275000512599945, |
| "tpp_threshold_20_total_metric": 0.20424997806549072, |
| "tpp_threshold_20_intended_diff_only": 0.23199999332427979, |
| "tpp_threshold_20_unintended_diff_only": 0.027750015258789062, |
| "tpp_threshold_50_total_metric": 0.3007499873638153, |
| "tpp_threshold_50_intended_diff_only": 0.34200000762939453, |
| "tpp_threshold_50_unintended_diff_only": 0.041250020265579224, |
| "tpp_threshold_100_total_metric": 0.30674995481967926, |
| "tpp_threshold_100_intended_diff_only": 0.3579999804496765, |
| "tpp_threshold_100_unintended_diff_only": 0.05125002562999725, |
| "tpp_threshold_500_total_metric": 0.2827499955892563, |
| "tpp_threshold_500_intended_diff_only": 0.36000001430511475, |
| "tpp_threshold_500_unintended_diff_only": 0.07725001871585846 |
| } |
| } |
| } |
| } |