| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745618394683, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0765000119805336, |
| "tpp_threshold_2_intended_diff_only": 0.12230001091957092, |
| "tpp_threshold_2_unintended_diff_only": 0.04579999893903732, |
| "tpp_threshold_5_total_metric": 0.10167500376701355, |
| "tpp_threshold_5_intended_diff_only": 0.19480000138282777, |
| "tpp_threshold_5_unintended_diff_only": 0.0931249976158142, |
| "tpp_threshold_10_total_metric": 0.11500000208616257, |
| "tpp_threshold_10_intended_diff_only": 0.23920000195503233, |
| "tpp_threshold_10_unintended_diff_only": 0.12419999986886979, |
| "tpp_threshold_20_total_metric": 0.13050001859664917, |
| "tpp_threshold_20_intended_diff_only": 0.28660001754760744, |
| "tpp_threshold_20_unintended_diff_only": 0.15609999895095825, |
| "tpp_threshold_50_total_metric": 0.18260000348091127, |
| "tpp_threshold_50_intended_diff_only": 0.3718000054359436, |
| "tpp_threshold_50_unintended_diff_only": 0.18920000195503234, |
| "tpp_threshold_100_total_metric": 0.2084250032901764, |
| "tpp_threshold_100_intended_diff_only": 0.412500011920929, |
| "tpp_threshold_100_unintended_diff_only": 0.20407500863075256, |
| "tpp_threshold_500_total_metric": 0.2076750323176384, |
| "tpp_threshold_500_intended_diff_only": 0.4441000342369079, |
| "tpp_threshold_500_unintended_diff_only": 0.23642500191926955 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.13225001692771912, |
| "tpp_threshold_2_intended_diff_only": 0.22060000896453857, |
| "tpp_threshold_2_unintended_diff_only": 0.08834999203681945, |
| "tpp_threshold_5_total_metric": 0.15930000245571135, |
| "tpp_threshold_5_intended_diff_only": 0.3381999969482422, |
| "tpp_threshold_5_unintended_diff_only": 0.17889999449253083, |
| "tpp_threshold_10_total_metric": 0.15724999606609344, |
| "tpp_threshold_10_intended_diff_only": 0.3937999963760376, |
| "tpp_threshold_10_unintended_diff_only": 0.23655000030994416, |
| "tpp_threshold_20_total_metric": 0.13380002081394196, |
| "tpp_threshold_20_intended_diff_only": 0.4248000144958496, |
| "tpp_threshold_20_unintended_diff_only": 0.29099999368190765, |
| "tpp_threshold_50_total_metric": 0.10550000369548798, |
| "tpp_threshold_50_intended_diff_only": 0.454200005531311, |
| "tpp_threshold_50_unintended_diff_only": 0.34870000183582306, |
| "tpp_threshold_100_total_metric": 0.0913000077009201, |
| "tpp_threshold_100_intended_diff_only": 0.4626000165939331, |
| "tpp_threshold_100_unintended_diff_only": 0.371300008893013, |
| "tpp_threshold_500_total_metric": 0.06810002028942108, |
| "tpp_threshold_500_intended_diff_only": 0.46760002374649046, |
| "tpp_threshold_500_unintended_diff_only": 0.3995000034570694 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.020750007033348082, |
| "tpp_threshold_2_intended_diff_only": 0.024000012874603273, |
| "tpp_threshold_2_unintended_diff_only": 0.003250005841255188, |
| "tpp_threshold_5_total_metric": 0.04405000507831573, |
| "tpp_threshold_5_intended_diff_only": 0.05140000581741333, |
| "tpp_threshold_5_unintended_diff_only": 0.007350000739097595, |
| "tpp_threshold_10_total_metric": 0.0727500081062317, |
| "tpp_threshold_10_intended_diff_only": 0.0846000075340271, |
| "tpp_threshold_10_unintended_diff_only": 0.01184999942779541, |
| "tpp_threshold_20_total_metric": 0.12720001637935638, |
| "tpp_threshold_20_intended_diff_only": 0.14840002059936525, |
| "tpp_threshold_20_unintended_diff_only": 0.02120000422000885, |
| "tpp_threshold_50_total_metric": 0.2597000032663345, |
| "tpp_threshold_50_intended_diff_only": 0.2894000053405762, |
| "tpp_threshold_50_unintended_diff_only": 0.029700002074241637, |
| "tpp_threshold_100_total_metric": 0.3255499988794327, |
| "tpp_threshold_100_intended_diff_only": 0.3624000072479248, |
| "tpp_threshold_100_unintended_diff_only": 0.03685000836849213, |
| "tpp_threshold_500_total_metric": 0.3472500443458557, |
| "tpp_threshold_500_intended_diff_only": 0.42060004472732543, |
| "tpp_threshold_500_unintended_diff_only": 0.07335000038146973 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.11.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.11.hook_resid_post", |
| "hook_layer": 11, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1692500114440918, |
| "tpp_threshold_2_intended_diff_only": 0.28200000524520874, |
| "tpp_threshold_2_unintended_diff_only": 0.11274999380111694, |
| "tpp_threshold_5_total_metric": 0.18074999749660492, |
| "tpp_threshold_5_intended_diff_only": 0.33399999141693115, |
| "tpp_threshold_5_unintended_diff_only": 0.15324999392032623, |
| "tpp_threshold_10_total_metric": 0.13349996507167816, |
| "tpp_threshold_10_intended_diff_only": 0.37699997425079346, |
| "tpp_threshold_10_unintended_diff_only": 0.2435000091791153, |
| "tpp_threshold_20_total_metric": 0.08249999582767487, |
| "tpp_threshold_20_intended_diff_only": 0.40799999237060547, |
| "tpp_threshold_20_unintended_diff_only": 0.3254999965429306, |
| "tpp_threshold_50_total_metric": 0.06825003027915955, |
| "tpp_threshold_50_intended_diff_only": 0.4240000247955322, |
| "tpp_threshold_50_unintended_diff_only": 0.3557499945163727, |
| "tpp_threshold_100_total_metric": 0.07075002789497375, |
| "tpp_threshold_100_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_100_unintended_diff_only": 0.367249995470047, |
| "tpp_threshold_500_total_metric": 0.041250020265579224, |
| "tpp_threshold_500_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_500_unintended_diff_only": 0.40575000643730164 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.1445000022649765, |
| "tpp_threshold_2_intended_diff_only": 0.2459999918937683, |
| "tpp_threshold_2_unintended_diff_only": 0.10149998962879181, |
| "tpp_threshold_5_total_metric": 0.15825000405311584, |
| "tpp_threshold_5_intended_diff_only": 0.296999990940094, |
| "tpp_threshold_5_unintended_diff_only": 0.13874998688697815, |
| "tpp_threshold_10_total_metric": 0.1652500182390213, |
| "tpp_threshold_10_intended_diff_only": 0.35600000619888306, |
| "tpp_threshold_10_unintended_diff_only": 0.19074998795986176, |
| "tpp_threshold_20_total_metric": 0.11925004422664642, |
| "tpp_threshold_20_intended_diff_only": 0.38700002431869507, |
| "tpp_threshold_20_unintended_diff_only": 0.26774998009204865, |
| "tpp_threshold_50_total_metric": 0.09349997341632843, |
| "tpp_threshold_50_intended_diff_only": 0.44599997997283936, |
| "tpp_threshold_50_unintended_diff_only": 0.3525000065565109, |
| "tpp_threshold_100_total_metric": 0.07325001060962677, |
| "tpp_threshold_100_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_100_unintended_diff_only": 0.3877500146627426, |
| "tpp_threshold_500_total_metric": 0.05925002694129944, |
| "tpp_threshold_500_intended_diff_only": 0.4650000333786011, |
| "tpp_threshold_500_unintended_diff_only": 0.40575000643730164 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.09100005030632019, |
| "tpp_threshold_2_intended_diff_only": 0.21500003337860107, |
| "tpp_threshold_2_unintended_diff_only": 0.12399998307228088, |
| "tpp_threshold_5_total_metric": 0.09900003671646118, |
| "tpp_threshold_5_intended_diff_only": 0.32600003480911255, |
| "tpp_threshold_5_unintended_diff_only": 0.22699999809265137, |
| "tpp_threshold_10_total_metric": 0.09549997746944427, |
| "tpp_threshold_10_intended_diff_only": 0.37599998712539673, |
| "tpp_threshold_10_unintended_diff_only": 0.28050000965595245, |
| "tpp_threshold_20_total_metric": 0.09925003349781036, |
| "tpp_threshold_20_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.31174997985363007, |
| "tpp_threshold_50_total_metric": 0.07325002551078796, |
| "tpp_threshold_50_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_50_unintended_diff_only": 0.3697499930858612, |
| "tpp_threshold_100_total_metric": 0.07049998641014099, |
| "tpp_threshold_100_intended_diff_only": 0.45399999618530273, |
| "tpp_threshold_100_unintended_diff_only": 0.38350000977516174, |
| "tpp_threshold_500_total_metric": 0.046750038862228394, |
| "tpp_threshold_500_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_500_unintended_diff_only": 0.40825000405311584 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.14174999296665192, |
| "tpp_threshold_2_intended_diff_only": 0.20399999618530273, |
| "tpp_threshold_2_unintended_diff_only": 0.06225000321865082, |
| "tpp_threshold_5_total_metric": 0.22974999248981476, |
| "tpp_threshold_5_intended_diff_only": 0.3579999804496765, |
| "tpp_threshold_5_unintended_diff_only": 0.12824998795986176, |
| "tpp_threshold_10_total_metric": 0.2695000022649765, |
| "tpp_threshold_10_intended_diff_only": 0.4449999928474426, |
| "tpp_threshold_10_unintended_diff_only": 0.17549999058246613, |
| "tpp_threshold_20_total_metric": 0.2395000010728836, |
| "tpp_threshold_20_intended_diff_only": 0.4710000157356262, |
| "tpp_threshold_20_unintended_diff_only": 0.23150001466274261, |
| "tpp_threshold_50_total_metric": 0.1600000113248825, |
| "tpp_threshold_50_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_50_unintended_diff_only": 0.3200000077486038, |
| "tpp_threshold_100_total_metric": 0.1262499988079071, |
| "tpp_threshold_100_intended_diff_only": 0.48100000619888306, |
| "tpp_threshold_100_unintended_diff_only": 0.35475000739097595, |
| "tpp_threshold_500_total_metric": 0.10799998044967651, |
| "tpp_threshold_500_intended_diff_only": 0.4919999837875366, |
| "tpp_threshold_500_unintended_diff_only": 0.3840000033378601 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.11475002765655518, |
| "tpp_threshold_2_intended_diff_only": 0.156000018119812, |
| "tpp_threshold_2_unintended_diff_only": 0.041249990463256836, |
| "tpp_threshold_5_total_metric": 0.12874998152256012, |
| "tpp_threshold_5_intended_diff_only": 0.37599998712539673, |
| "tpp_threshold_5_unintended_diff_only": 0.2472500056028366, |
| "tpp_threshold_10_total_metric": 0.12250001728534698, |
| "tpp_threshold_10_intended_diff_only": 0.4150000214576721, |
| "tpp_threshold_10_unintended_diff_only": 0.29250000417232513, |
| "tpp_threshold_20_total_metric": 0.12850002944469452, |
| "tpp_threshold_20_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_20_unintended_diff_only": 0.31849999725818634, |
| "tpp_threshold_50_total_metric": 0.13249997794628143, |
| "tpp_threshold_50_intended_diff_only": 0.4779999852180481, |
| "tpp_threshold_50_unintended_diff_only": 0.34550000727176666, |
| "tpp_threshold_100_total_metric": 0.1157500147819519, |
| "tpp_threshold_100_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_100_unintended_diff_only": 0.3632500171661377, |
| "tpp_threshold_500_total_metric": 0.08525003492832184, |
| "tpp_threshold_500_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_500_unintended_diff_only": 0.39374999701976776 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.013000011444091797, |
| "tpp_threshold_2_intended_diff_only": 0.013000011444091797, |
| "tpp_threshold_2_unintended_diff_only": 0.0, |
| "tpp_threshold_5_total_metric": 0.010749995708465576, |
| "tpp_threshold_5_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_5_unintended_diff_only": 0.0032500028610229492, |
| "tpp_threshold_10_total_metric": 0.011999979615211487, |
| "tpp_threshold_10_intended_diff_only": 0.02499997615814209, |
| "tpp_threshold_10_unintended_diff_only": 0.012999996542930603, |
| "tpp_threshold_20_total_metric": 0.0492500364780426, |
| "tpp_threshold_20_intended_diff_only": 0.07600003480911255, |
| "tpp_threshold_20_unintended_diff_only": 0.026749998331069946, |
| "tpp_threshold_50_total_metric": 0.19025000929832458, |
| "tpp_threshold_50_intended_diff_only": 0.22100001573562622, |
| "tpp_threshold_50_unintended_diff_only": 0.030750006437301636, |
| "tpp_threshold_100_total_metric": 0.28975000977516174, |
| "tpp_threshold_100_intended_diff_only": 0.3270000219345093, |
| "tpp_threshold_100_unintended_diff_only": 0.037250012159347534, |
| "tpp_threshold_500_total_metric": 0.39500005543231964, |
| "tpp_threshold_500_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_500_unintended_diff_only": 0.05499999225139618 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.017499953508377075, |
| "tpp_threshold_2_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_2_unintended_diff_only": 0.0025000274181365967, |
| "tpp_threshold_5_total_metric": 0.026499956846237183, |
| "tpp_threshold_5_intended_diff_only": 0.030999958515167236, |
| "tpp_threshold_5_unintended_diff_only": 0.004500001668930054, |
| "tpp_threshold_10_total_metric": 0.051499977707862854, |
| "tpp_threshold_10_intended_diff_only": 0.06199997663497925, |
| "tpp_threshold_10_unintended_diff_only": 0.010499998927116394, |
| "tpp_threshold_20_total_metric": 0.08224999904632568, |
| "tpp_threshold_20_intended_diff_only": 0.11100000143051147, |
| "tpp_threshold_20_unintended_diff_only": 0.02875000238418579, |
| "tpp_threshold_50_total_metric": 0.2789999842643738, |
| "tpp_threshold_50_intended_diff_only": 0.3219999670982361, |
| "tpp_threshold_50_unintended_diff_only": 0.042999982833862305, |
| "tpp_threshold_100_total_metric": 0.3699999451637268, |
| "tpp_threshold_100_intended_diff_only": 0.41899996995925903, |
| "tpp_threshold_100_unintended_diff_only": 0.04900002479553223, |
| "tpp_threshold_500_total_metric": 0.3347500264644623, |
| "tpp_threshold_500_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_500_unintended_diff_only": 0.10324999690055847 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.0017500221729278564, |
| "tpp_threshold_2_intended_diff_only": 0.003000020980834961, |
| "tpp_threshold_2_unintended_diff_only": 0.0012499988079071045, |
| "tpp_threshold_5_total_metric": 0.005250036716461182, |
| "tpp_threshold_5_intended_diff_only": 0.016000032424926758, |
| "tpp_threshold_5_unintended_diff_only": 0.010749995708465576, |
| "tpp_threshold_10_total_metric": 0.017000049352645874, |
| "tpp_threshold_10_intended_diff_only": 0.029000043869018555, |
| "tpp_threshold_10_unintended_diff_only": 0.01199999451637268, |
| "tpp_threshold_20_total_metric": 0.05550001561641693, |
| "tpp_threshold_20_intended_diff_only": 0.06800001859664917, |
| "tpp_threshold_20_unintended_diff_only": 0.012500002980232239, |
| "tpp_threshold_50_total_metric": 0.1497500091791153, |
| "tpp_threshold_50_intended_diff_only": 0.16600000858306885, |
| "tpp_threshold_50_unintended_diff_only": 0.016249999403953552, |
| "tpp_threshold_100_total_metric": 0.26200005412101746, |
| "tpp_threshold_100_intended_diff_only": 0.28300005197525024, |
| "tpp_threshold_100_unintended_diff_only": 0.020999997854232788, |
| "tpp_threshold_500_total_metric": 0.3590000867843628, |
| "tpp_threshold_500_intended_diff_only": 0.42500007152557373, |
| "tpp_threshold_500_unintended_diff_only": 0.06599998474121094 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.015000015497207642, |
| "tpp_threshold_2_intended_diff_only": 0.022000014781951904, |
| "tpp_threshold_2_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_5_total_metric": 0.07625001668930054, |
| "tpp_threshold_5_intended_diff_only": 0.08700001239776611, |
| "tpp_threshold_5_unintended_diff_only": 0.010749995708465576, |
| "tpp_threshold_10_total_metric": 0.12475000321865082, |
| "tpp_threshold_10_intended_diff_only": 0.1380000114440918, |
| "tpp_threshold_10_unintended_diff_only": 0.013250008225440979, |
| "tpp_threshold_20_total_metric": 0.20225001871585846, |
| "tpp_threshold_20_intended_diff_only": 0.2250000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.02275000512599945, |
| "tpp_threshold_50_total_metric": 0.3435000032186508, |
| "tpp_threshold_50_intended_diff_only": 0.37300002574920654, |
| "tpp_threshold_50_unintended_diff_only": 0.029500022530555725, |
| "tpp_threshold_100_total_metric": 0.36274999380111694, |
| "tpp_threshold_100_intended_diff_only": 0.40799999237060547, |
| "tpp_threshold_100_unintended_diff_only": 0.045249998569488525, |
| "tpp_threshold_500_total_metric": 0.33650003373622894, |
| "tpp_threshold_500_intended_diff_only": 0.4140000343322754, |
| "tpp_threshold_500_unintended_diff_only": 0.07750000059604645 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.05650003254413605, |
| "tpp_threshold_2_intended_diff_only": 0.06200003623962402, |
| "tpp_threshold_2_unintended_diff_only": 0.005500003695487976, |
| "tpp_threshold_5_total_metric": 0.1015000194311142, |
| "tpp_threshold_5_intended_diff_only": 0.10900002717971802, |
| "tpp_threshold_5_unintended_diff_only": 0.007500007748603821, |
| "tpp_threshold_10_total_metric": 0.15850003063678741, |
| "tpp_threshold_10_intended_diff_only": 0.1690000295639038, |
| "tpp_threshold_10_unintended_diff_only": 0.010499998927116394, |
| "tpp_threshold_20_total_metric": 0.24675001204013824, |
| "tpp_threshold_20_intended_diff_only": 0.26200002431869507, |
| "tpp_threshold_20_unintended_diff_only": 0.015250012278556824, |
| "tpp_threshold_50_total_metric": 0.3360000103712082, |
| "tpp_threshold_50_intended_diff_only": 0.36500000953674316, |
| "tpp_threshold_50_unintended_diff_only": 0.028999999165534973, |
| "tpp_threshold_100_total_metric": 0.34324999153614044, |
| "tpp_threshold_100_intended_diff_only": 0.375, |
| "tpp_threshold_100_unintended_diff_only": 0.03175000846385956, |
| "tpp_threshold_500_total_metric": 0.3110000193119049, |
| "tpp_threshold_500_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_500_unintended_diff_only": 0.0650000274181366 |
| } |
| } |
| } |
| } |