| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745753263050, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0927500158548355, |
| "tpp_threshold_2_intended_diff_only": 0.10910000801086425, |
| "tpp_threshold_2_unintended_diff_only": 0.01634999215602875, |
| "tpp_threshold_5_total_metric": 0.16702500581741334, |
| "tpp_threshold_5_intended_diff_only": 0.2118000030517578, |
| "tpp_threshold_5_unintended_diff_only": 0.044774997234344485, |
| "tpp_threshold_10_total_metric": 0.20147501677274704, |
| "tpp_threshold_10_intended_diff_only": 0.2745000123977661, |
| "tpp_threshold_10_unintended_diff_only": 0.07302499562501907, |
| "tpp_threshold_20_total_metric": 0.2179250165820122, |
| "tpp_threshold_20_intended_diff_only": 0.325000011920929, |
| "tpp_threshold_20_unintended_diff_only": 0.10707499533891678, |
| "tpp_threshold_50_total_metric": 0.2513500273227692, |
| "tpp_threshold_50_intended_diff_only": 0.40420002341270445, |
| "tpp_threshold_50_unintended_diff_only": 0.1528499960899353, |
| "tpp_threshold_100_total_metric": 0.2604250147938728, |
| "tpp_threshold_100_intended_diff_only": 0.4364000201225281, |
| "tpp_threshold_100_unintended_diff_only": 0.17597500532865526, |
| "tpp_threshold_500_total_metric": 0.23210002928972245, |
| "tpp_threshold_500_intended_diff_only": 0.44680003523826595, |
| "tpp_threshold_500_unintended_diff_only": 0.21470000594854355 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.1517000138759613, |
| "tpp_threshold_2_intended_diff_only": 0.17860000133514403, |
| "tpp_threshold_2_unintended_diff_only": 0.02689998745918274, |
| "tpp_threshold_5_total_metric": 0.2607500046491623, |
| "tpp_threshold_5_intended_diff_only": 0.33980000019073486, |
| "tpp_threshold_5_unintended_diff_only": 0.07904999554157258, |
| "tpp_threshold_10_total_metric": 0.27370002269744875, |
| "tpp_threshold_10_intended_diff_only": 0.40420001745224, |
| "tpp_threshold_10_unintended_diff_only": 0.13049999475479127, |
| "tpp_threshold_20_total_metric": 0.24720001816749573, |
| "tpp_threshold_20_intended_diff_only": 0.44060001373291013, |
| "tpp_threshold_20_unintended_diff_only": 0.19339999556541443, |
| "tpp_threshold_50_total_metric": 0.18795002698898317, |
| "tpp_threshold_50_intended_diff_only": 0.4600000262260437, |
| "tpp_threshold_50_unintended_diff_only": 0.2720499992370605, |
| "tpp_threshold_100_total_metric": 0.15005001723766326, |
| "tpp_threshold_100_intended_diff_only": 0.46240001916885376, |
| "tpp_threshold_100_unintended_diff_only": 0.3123500019311905, |
| "tpp_threshold_500_total_metric": 0.09020003080368041, |
| "tpp_threshold_500_intended_diff_only": 0.4630000352859497, |
| "tpp_threshold_500_unintended_diff_only": 0.3728000044822693 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.033800017833709714, |
| "tpp_threshold_2_intended_diff_only": 0.03960001468658447, |
| "tpp_threshold_2_unintended_diff_only": 0.005799996852874756, |
| "tpp_threshold_5_total_metric": 0.07330000698566437, |
| "tpp_threshold_5_intended_diff_only": 0.08380000591278076, |
| "tpp_threshold_5_unintended_diff_only": 0.010499998927116394, |
| "tpp_threshold_10_total_metric": 0.12925001084804535, |
| "tpp_threshold_10_intended_diff_only": 0.14480000734329224, |
| "tpp_threshold_10_unintended_diff_only": 0.015549996495246887, |
| "tpp_threshold_20_total_metric": 0.18865001499652861, |
| "tpp_threshold_20_intended_diff_only": 0.20940001010894777, |
| "tpp_threshold_20_unintended_diff_only": 0.02074999511241913, |
| "tpp_threshold_50_total_metric": 0.3147500276565552, |
| "tpp_threshold_50_intended_diff_only": 0.34840002059936526, |
| "tpp_threshold_50_unintended_diff_only": 0.033649992942810056, |
| "tpp_threshold_100_total_metric": 0.3708000123500824, |
| "tpp_threshold_100_intended_diff_only": 0.4104000210762024, |
| "tpp_threshold_100_unintended_diff_only": 0.039600008726119997, |
| "tpp_threshold_500_total_metric": 0.37400002777576447, |
| "tpp_threshold_500_intended_diff_only": 0.43060003519058226, |
| "tpp_threshold_500_unintended_diff_only": 0.05660000741481781 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.24.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.24.hook_resid_post", |
| "hook_layer": 24, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1782500147819519, |
| "tpp_threshold_2_intended_diff_only": 0.20800000429153442, |
| "tpp_threshold_2_unintended_diff_only": 0.02974998950958252, |
| "tpp_threshold_5_total_metric": 0.2787500023841858, |
| "tpp_threshold_5_intended_diff_only": 0.3610000014305115, |
| "tpp_threshold_5_unintended_diff_only": 0.08224999904632568, |
| "tpp_threshold_10_total_metric": 0.2562500238418579, |
| "tpp_threshold_10_intended_diff_only": 0.3970000147819519, |
| "tpp_threshold_10_unintended_diff_only": 0.140749990940094, |
| "tpp_threshold_20_total_metric": 0.18274998664855957, |
| "tpp_threshold_20_intended_diff_only": 0.41600000858306885, |
| "tpp_threshold_20_unintended_diff_only": 0.23325002193450928, |
| "tpp_threshold_50_total_metric": 0.14399997889995575, |
| "tpp_threshold_50_intended_diff_only": 0.4309999942779541, |
| "tpp_threshold_50_unintended_diff_only": 0.28700001537799835, |
| "tpp_threshold_100_total_metric": 0.12300004065036774, |
| "tpp_threshold_100_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_100_unintended_diff_only": 0.314999982714653, |
| "tpp_threshold_500_total_metric": 0.052500009536743164, |
| "tpp_threshold_500_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_500_unintended_diff_only": 0.3855000138282776 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.15425002574920654, |
| "tpp_threshold_2_intended_diff_only": 0.23100000619888306, |
| "tpp_threshold_2_unintended_diff_only": 0.07674998044967651, |
| "tpp_threshold_5_total_metric": 0.20524999499320984, |
| "tpp_threshold_5_intended_diff_only": 0.3059999942779541, |
| "tpp_threshold_5_unintended_diff_only": 0.10074999928474426, |
| "tpp_threshold_10_total_metric": 0.2202499955892563, |
| "tpp_threshold_10_intended_diff_only": 0.3889999985694885, |
| "tpp_threshold_10_unintended_diff_only": 0.16875000298023224, |
| "tpp_threshold_20_total_metric": 0.22499999403953552, |
| "tpp_threshold_20_intended_diff_only": 0.4269999861717224, |
| "tpp_threshold_20_unintended_diff_only": 0.2019999921321869, |
| "tpp_threshold_50_total_metric": 0.16575004160404205, |
| "tpp_threshold_50_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_50_unintended_diff_only": 0.2852499932050705, |
| "tpp_threshold_100_total_metric": 0.11399997770786285, |
| "tpp_threshold_100_intended_diff_only": 0.45499998331069946, |
| "tpp_threshold_100_unintended_diff_only": 0.3410000056028366, |
| "tpp_threshold_500_total_metric": 0.06800001859664917, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.3880000114440918 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.1990000158548355, |
| "tpp_threshold_2_intended_diff_only": 0.21299999952316284, |
| "tpp_threshold_2_unintended_diff_only": 0.013999983668327332, |
| "tpp_threshold_5_total_metric": 0.28150002658367157, |
| "tpp_threshold_5_intended_diff_only": 0.3270000219345093, |
| "tpp_threshold_5_unintended_diff_only": 0.04549999535083771, |
| "tpp_threshold_10_total_metric": 0.25925005972385406, |
| "tpp_threshold_10_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_10_unintended_diff_only": 0.10774998366832733, |
| "tpp_threshold_20_total_metric": 0.26075004041194916, |
| "tpp_threshold_20_intended_diff_only": 0.4100000262260437, |
| "tpp_threshold_20_unintended_diff_only": 0.14924998581409454, |
| "tpp_threshold_50_total_metric": 0.19475005567073822, |
| "tpp_threshold_50_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_50_unintended_diff_only": 0.2552499920129776, |
| "tpp_threshold_100_total_metric": 0.15675002336502075, |
| "tpp_threshold_100_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_100_unintended_diff_only": 0.2952499985694885, |
| "tpp_threshold_500_total_metric": 0.06400005519390106, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.39000000059604645 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.042000025510787964, |
| "tpp_threshold_2_intended_diff_only": 0.046000003814697266, |
| "tpp_threshold_2_unintended_diff_only": 0.003999978303909302, |
| "tpp_threshold_5_total_metric": 0.2472500205039978, |
| "tpp_threshold_5_intended_diff_only": 0.3240000009536743, |
| "tpp_threshold_5_unintended_diff_only": 0.07674998044967651, |
| "tpp_threshold_10_total_metric": 0.33250002562999725, |
| "tpp_threshold_10_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_10_unintended_diff_only": 0.11049999296665192, |
| "tpp_threshold_20_total_metric": 0.28275005519390106, |
| "tpp_threshold_20_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_20_unintended_diff_only": 0.20524998009204865, |
| "tpp_threshold_50_total_metric": 0.21925003826618195, |
| "tpp_threshold_50_intended_diff_only": 0.4930000305175781, |
| "tpp_threshold_50_unintended_diff_only": 0.2737499922513962, |
| "tpp_threshold_100_total_metric": 0.18275003135204315, |
| "tpp_threshold_100_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_100_unintended_diff_only": 0.30925001204013824, |
| "tpp_threshold_500_total_metric": 0.1520000547170639, |
| "tpp_threshold_500_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.3399999886751175 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.1849999874830246, |
| "tpp_threshold_2_intended_diff_only": 0.19499999284744263, |
| "tpp_threshold_2_unintended_diff_only": 0.01000000536441803, |
| "tpp_threshold_5_total_metric": 0.29099997878074646, |
| "tpp_threshold_5_intended_diff_only": 0.38099998235702515, |
| "tpp_threshold_5_unintended_diff_only": 0.09000000357627869, |
| "tpp_threshold_10_total_metric": 0.30025000870227814, |
| "tpp_threshold_10_intended_diff_only": 0.42500001192092896, |
| "tpp_threshold_10_unintended_diff_only": 0.12475000321865082, |
| "tpp_threshold_20_total_metric": 0.2847500145435333, |
| "tpp_threshold_20_intended_diff_only": 0.4620000123977661, |
| "tpp_threshold_20_unintended_diff_only": 0.1772499978542328, |
| "tpp_threshold_50_total_metric": 0.2160000205039978, |
| "tpp_threshold_50_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_50_unintended_diff_only": 0.2590000033378601, |
| "tpp_threshold_100_total_metric": 0.17375001311302185, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.30125001072883606, |
| "tpp_threshold_500_total_metric": 0.1145000159740448, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.3605000078678131 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.014500007033348083, |
| "tpp_threshold_2_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_2_unintended_diff_only": 0.00849999487400055, |
| "tpp_threshold_5_total_metric": 0.0467500239610672, |
| "tpp_threshold_5_intended_diff_only": 0.05900001525878906, |
| "tpp_threshold_5_unintended_diff_only": 0.012249991297721863, |
| "tpp_threshold_10_total_metric": 0.09600003063678741, |
| "tpp_threshold_10_intended_diff_only": 0.11500000953674316, |
| "tpp_threshold_10_unintended_diff_only": 0.01899997889995575, |
| "tpp_threshold_20_total_metric": 0.13050000369548798, |
| "tpp_threshold_20_intended_diff_only": 0.15299999713897705, |
| "tpp_threshold_20_unintended_diff_only": 0.022499993443489075, |
| "tpp_threshold_50_total_metric": 0.30750006437301636, |
| "tpp_threshold_50_intended_diff_only": 0.33900004625320435, |
| "tpp_threshold_50_unintended_diff_only": 0.03149998188018799, |
| "tpp_threshold_100_total_metric": 0.3905000239610672, |
| "tpp_threshold_100_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_100_unintended_diff_only": 0.02850000560283661, |
| "tpp_threshold_500_total_metric": 0.40550000965595245, |
| "tpp_threshold_500_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_500_unintended_diff_only": 0.05150000751018524 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.059750065207481384, |
| "tpp_threshold_2_intended_diff_only": 0.0700000524520874, |
| "tpp_threshold_2_unintended_diff_only": 0.010249987244606018, |
| "tpp_threshold_5_total_metric": 0.1442500203847885, |
| "tpp_threshold_5_intended_diff_only": 0.16100001335144043, |
| "tpp_threshold_5_unintended_diff_only": 0.016749992966651917, |
| "tpp_threshold_10_total_metric": 0.22050006687641144, |
| "tpp_threshold_10_intended_diff_only": 0.23600006103515625, |
| "tpp_threshold_10_unintended_diff_only": 0.015499994158744812, |
| "tpp_threshold_20_total_metric": 0.30075007677078247, |
| "tpp_threshold_20_intended_diff_only": 0.3200000524520874, |
| "tpp_threshold_20_unintended_diff_only": 0.01924997568130493, |
| "tpp_threshold_50_total_metric": 0.3745000660419464, |
| "tpp_threshold_50_intended_diff_only": 0.4070000648498535, |
| "tpp_threshold_50_unintended_diff_only": 0.032499998807907104, |
| "tpp_threshold_100_total_metric": 0.4035000205039978, |
| "tpp_threshold_100_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_100_unintended_diff_only": 0.03350001573562622, |
| "tpp_threshold_500_total_metric": 0.3905000686645508, |
| "tpp_threshold_500_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_500_unintended_diff_only": 0.048500001430511475 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.002250000834465027, |
| "tpp_threshold_2_intended_diff_only": 0.0, |
| "tpp_threshold_2_unintended_diff_only": 0.002250000834465027, |
| "tpp_threshold_5_total_metric": 0.001999959349632263, |
| "tpp_threshold_5_intended_diff_only": 0.006999969482421875, |
| "tpp_threshold_5_unintended_diff_only": 0.005000010132789612, |
| "tpp_threshold_10_total_metric": 0.01649998128414154, |
| "tpp_threshold_10_intended_diff_only": 0.0339999794960022, |
| "tpp_threshold_10_unintended_diff_only": 0.017499998211860657, |
| "tpp_threshold_20_total_metric": 0.07074996829032898, |
| "tpp_threshold_20_intended_diff_only": 0.0899999737739563, |
| "tpp_threshold_20_unintended_diff_only": 0.01925000548362732, |
| "tpp_threshold_50_total_metric": 0.2864999771118164, |
| "tpp_threshold_50_intended_diff_only": 0.3489999771118164, |
| "tpp_threshold_50_unintended_diff_only": 0.0625, |
| "tpp_threshold_100_total_metric": 0.33149999380111694, |
| "tpp_threshold_100_intended_diff_only": 0.41600000858306885, |
| "tpp_threshold_100_unintended_diff_only": 0.0845000147819519, |
| "tpp_threshold_500_total_metric": 0.3295000046491623, |
| "tpp_threshold_500_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_500_unintended_diff_only": 0.10450001060962677 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.00850003957748413, |
| "tpp_threshold_2_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_2_unintended_diff_only": 0.0034999847412109375, |
| "tpp_threshold_5_total_metric": 0.06149999797344208, |
| "tpp_threshold_5_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_5_unintended_diff_only": 0.007500007748603821, |
| "tpp_threshold_10_total_metric": 0.14374998211860657, |
| "tpp_threshold_10_intended_diff_only": 0.15399998426437378, |
| "tpp_threshold_10_unintended_diff_only": 0.010250002145767212, |
| "tpp_threshold_20_total_metric": 0.18575002253055573, |
| "tpp_threshold_20_intended_diff_only": 0.20200002193450928, |
| "tpp_threshold_20_unintended_diff_only": 0.016249999403953552, |
| "tpp_threshold_50_total_metric": 0.27125002443790436, |
| "tpp_threshold_50_intended_diff_only": 0.28600001335144043, |
| "tpp_threshold_50_unintended_diff_only": 0.014749988913536072, |
| "tpp_threshold_100_total_metric": 0.3720000237226486, |
| "tpp_threshold_100_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_100_unintended_diff_only": 0.024000003933906555, |
| "tpp_threshold_500_total_metric": 0.39775003492832184, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.03425000607967377 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.08849997818470001, |
| "tpp_threshold_2_intended_diff_only": 0.09299999475479126, |
| "tpp_threshold_2_unintended_diff_only": 0.0045000165700912476, |
| "tpp_threshold_5_total_metric": 0.11200003325939178, |
| "tpp_threshold_5_intended_diff_only": 0.12300002574920654, |
| "tpp_threshold_5_unintended_diff_only": 0.010999992489814758, |
| "tpp_threshold_10_total_metric": 0.16949999332427979, |
| "tpp_threshold_10_intended_diff_only": 0.1850000023841858, |
| "tpp_threshold_10_unintended_diff_only": 0.015500009059906006, |
| "tpp_threshold_20_total_metric": 0.255500003695488, |
| "tpp_threshold_20_intended_diff_only": 0.28200000524520874, |
| "tpp_threshold_20_unintended_diff_only": 0.026500001549720764, |
| "tpp_threshold_50_total_metric": 0.33400000631809235, |
| "tpp_threshold_50_intended_diff_only": 0.3610000014305115, |
| "tpp_threshold_50_unintended_diff_only": 0.02699999511241913, |
| "tpp_threshold_100_total_metric": 0.3564999997615814, |
| "tpp_threshold_100_intended_diff_only": 0.3840000033378601, |
| "tpp_threshold_100_unintended_diff_only": 0.027500003576278687, |
| "tpp_threshold_500_total_metric": 0.34675002098083496, |
| "tpp_threshold_500_intended_diff_only": 0.39100003242492676, |
| "tpp_threshold_500_unintended_diff_only": 0.0442500114440918 |
| } |
| } |
| } |
| } |