| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752865001, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.059199997782707216, |
| "tpp_threshold_2_intended_diff_only": 0.07409999370574952, |
| "tpp_threshold_2_unintended_diff_only": 0.014899995923042298, |
| "tpp_threshold_5_total_metric": 0.14160000234842302, |
| "tpp_threshold_5_intended_diff_only": 0.17550000548362732, |
| "tpp_threshold_5_unintended_diff_only": 0.03390000313520432, |
| "tpp_threshold_10_total_metric": 0.19855001121759414, |
| "tpp_threshold_10_intended_diff_only": 0.26320000290870665, |
| "tpp_threshold_10_unintended_diff_only": 0.06464999169111252, |
| "tpp_threshold_20_total_metric": 0.2088250070810318, |
| "tpp_threshold_20_intended_diff_only": 0.31790001392364503, |
| "tpp_threshold_20_unintended_diff_only": 0.10907500684261322, |
| "tpp_threshold_50_total_metric": 0.22935001403093339, |
| "tpp_threshold_50_intended_diff_only": 0.38320001363754275, |
| "tpp_threshold_50_unintended_diff_only": 0.15384999960660933, |
| "tpp_threshold_100_total_metric": 0.24152501374483107, |
| "tpp_threshold_100_intended_diff_only": 0.4228000164031982, |
| "tpp_threshold_100_unintended_diff_only": 0.18127500265836716, |
| "tpp_threshold_500_total_metric": 0.23130002915859224, |
| "tpp_threshold_500_intended_diff_only": 0.45010003447532654, |
| "tpp_threshold_500_unintended_diff_only": 0.2188000053167343 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.08704999089241028, |
| "tpp_threshold_2_intended_diff_only": 0.10579999685287475, |
| "tpp_threshold_2_unintended_diff_only": 0.01875000596046448, |
| "tpp_threshold_5_total_metric": 0.1984499990940094, |
| "tpp_threshold_5_intended_diff_only": 0.25660001039505004, |
| "tpp_threshold_5_unintended_diff_only": 0.05815001130104065, |
| "tpp_threshold_10_total_metric": 0.26495001316070554, |
| "tpp_threshold_10_intended_diff_only": 0.38120001554489136, |
| "tpp_threshold_10_unintended_diff_only": 0.11625000238418579, |
| "tpp_threshold_20_total_metric": 0.22785000801086425, |
| "tpp_threshold_20_intended_diff_only": 0.4284000277519226, |
| "tpp_threshold_20_unintended_diff_only": 0.20055001974105835, |
| "tpp_threshold_50_total_metric": 0.1801000118255615, |
| "tpp_threshold_50_intended_diff_only": 0.46040002107620237, |
| "tpp_threshold_50_unintended_diff_only": 0.28030000925064086, |
| "tpp_threshold_100_total_metric": 0.14095001220703124, |
| "tpp_threshold_100_intended_diff_only": 0.4662000298500061, |
| "tpp_threshold_100_unintended_diff_only": 0.32525001764297484, |
| "tpp_threshold_500_total_metric": 0.08530002534389496, |
| "tpp_threshold_500_intended_diff_only": 0.4676000475883484, |
| "tpp_threshold_500_unintended_diff_only": 0.3823000222444534 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.03135000467300415, |
| "tpp_threshold_2_intended_diff_only": 0.04239999055862427, |
| "tpp_threshold_2_unintended_diff_only": 0.011049985885620117, |
| "tpp_threshold_5_total_metric": 0.0847500056028366, |
| "tpp_threshold_5_intended_diff_only": 0.09440000057220459, |
| "tpp_threshold_5_unintended_diff_only": 0.00964999496936798, |
| "tpp_threshold_10_total_metric": 0.13215000927448273, |
| "tpp_threshold_10_intended_diff_only": 0.14519999027252198, |
| "tpp_threshold_10_unintended_diff_only": 0.013049980998039246, |
| "tpp_threshold_20_total_metric": 0.18980000615119935, |
| "tpp_threshold_20_intended_diff_only": 0.20740000009536744, |
| "tpp_threshold_20_unintended_diff_only": 0.017599993944168092, |
| "tpp_threshold_50_total_metric": 0.27860001623630526, |
| "tpp_threshold_50_intended_diff_only": 0.30600000619888307, |
| "tpp_threshold_50_unintended_diff_only": 0.02739998996257782, |
| "tpp_threshold_100_total_metric": 0.3421000152826309, |
| "tpp_threshold_100_intended_diff_only": 0.37940000295639037, |
| "tpp_threshold_100_unintended_diff_only": 0.03729998767375946, |
| "tpp_threshold_500_total_metric": 0.3773000329732895, |
| "tpp_threshold_500_intended_diff_only": 0.4326000213623047, |
| "tpp_threshold_500_unintended_diff_only": 0.055299988389015196 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.18.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.18.hook_resid_post", |
| "hook_layer": 18, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1407500058412552, |
| "tpp_threshold_2_intended_diff_only": 0.17900002002716064, |
| "tpp_threshold_2_unintended_diff_only": 0.03825001418590546, |
| "tpp_threshold_5_total_metric": 0.21100002527236938, |
| "tpp_threshold_5_intended_diff_only": 0.2850000262260437, |
| "tpp_threshold_5_unintended_diff_only": 0.07400000095367432, |
| "tpp_threshold_10_total_metric": 0.26725004613399506, |
| "tpp_threshold_10_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.10875000059604645, |
| "tpp_threshold_20_total_metric": 0.2617500424385071, |
| "tpp_threshold_20_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_20_unintended_diff_only": 0.14625000953674316, |
| "tpp_threshold_50_total_metric": 0.2160000056028366, |
| "tpp_threshold_50_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_50_unintended_diff_only": 0.2280000001192093, |
| "tpp_threshold_100_total_metric": 0.1522500365972519, |
| "tpp_threshold_100_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_100_unintended_diff_only": 0.29775001108646393, |
| "tpp_threshold_500_total_metric": 0.098750039935112, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.3552500158548355 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.06124997138977051, |
| "tpp_threshold_2_intended_diff_only": 0.07599997520446777, |
| "tpp_threshold_2_unintended_diff_only": 0.014750003814697266, |
| "tpp_threshold_5_total_metric": 0.10599996149539948, |
| "tpp_threshold_5_intended_diff_only": 0.12699997425079346, |
| "tpp_threshold_5_unintended_diff_only": 0.021000012755393982, |
| "tpp_threshold_10_total_metric": 0.2592499852180481, |
| "tpp_threshold_10_intended_diff_only": 0.33899998664855957, |
| "tpp_threshold_10_unintended_diff_only": 0.07975000143051147, |
| "tpp_threshold_20_total_metric": 0.18024997413158417, |
| "tpp_threshold_20_intended_diff_only": 0.40299999713897705, |
| "tpp_threshold_20_unintended_diff_only": 0.22275002300739288, |
| "tpp_threshold_50_total_metric": 0.1535000205039978, |
| "tpp_threshold_50_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_50_unintended_diff_only": 0.29750001430511475, |
| "tpp_threshold_100_total_metric": 0.12275001406669617, |
| "tpp_threshold_100_intended_diff_only": 0.4650000333786011, |
| "tpp_threshold_100_unintended_diff_only": 0.3422500193119049, |
| "tpp_threshold_500_total_metric": 0.07750003039836884, |
| "tpp_threshold_500_intended_diff_only": 0.4650000333786011, |
| "tpp_threshold_500_unintended_diff_only": 0.38750000298023224 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.13425002992153168, |
| "tpp_threshold_2_intended_diff_only": 0.15000003576278687, |
| "tpp_threshold_2_unintended_diff_only": 0.015750005841255188, |
| "tpp_threshold_5_total_metric": 0.249750018119812, |
| "tpp_threshold_5_intended_diff_only": 0.2850000262260437, |
| "tpp_threshold_5_unintended_diff_only": 0.03525000810623169, |
| "tpp_threshold_10_total_metric": 0.2782500237226486, |
| "tpp_threshold_10_intended_diff_only": 0.3830000162124634, |
| "tpp_threshold_10_unintended_diff_only": 0.10474999248981476, |
| "tpp_threshold_20_total_metric": 0.2540000379085541, |
| "tpp_threshold_20_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_20_unintended_diff_only": 0.15400001406669617, |
| "tpp_threshold_50_total_metric": 0.16325001418590546, |
| "tpp_threshold_50_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_50_unintended_diff_only": 0.2837500125169754, |
| "tpp_threshold_100_total_metric": 0.12900002300739288, |
| "tpp_threshold_100_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_100_unintended_diff_only": 0.32200001180171967, |
| "tpp_threshold_500_total_metric": 0.047750040888786316, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.40525002777576447 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.046999990940093994, |
| "tpp_threshold_2_intended_diff_only": 0.06499999761581421, |
| "tpp_threshold_2_unintended_diff_only": 0.018000006675720215, |
| "tpp_threshold_5_total_metric": 0.18800000846385956, |
| "tpp_threshold_5_intended_diff_only": 0.26200002431869507, |
| "tpp_threshold_5_unintended_diff_only": 0.07400001585483551, |
| "tpp_threshold_10_total_metric": 0.27025002241134644, |
| "tpp_threshold_10_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_10_unintended_diff_only": 0.13475000858306885, |
| "tpp_threshold_20_total_metric": 0.24150003492832184, |
| "tpp_threshold_20_intended_diff_only": 0.47700005769729614, |
| "tpp_threshold_20_unintended_diff_only": 0.2355000227689743, |
| "tpp_threshold_50_total_metric": 0.1937500387430191, |
| "tpp_threshold_50_intended_diff_only": 0.487000048160553, |
| "tpp_threshold_50_unintended_diff_only": 0.2932500094175339, |
| "tpp_threshold_100_total_metric": 0.1717499941587448, |
| "tpp_threshold_100_intended_diff_only": 0.49000000953674316, |
| "tpp_threshold_100_unintended_diff_only": 0.31825001537799835, |
| "tpp_threshold_500_total_metric": 0.11825002729892731, |
| "tpp_threshold_500_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_500_unintended_diff_only": 0.37275002896785736 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.051999956369400024, |
| "tpp_threshold_2_intended_diff_only": 0.05899995565414429, |
| "tpp_threshold_2_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_5_total_metric": 0.23749998211860657, |
| "tpp_threshold_5_intended_diff_only": 0.3240000009536743, |
| "tpp_threshold_5_unintended_diff_only": 0.08650001883506775, |
| "tpp_threshold_10_total_metric": 0.24974998831748962, |
| "tpp_threshold_10_intended_diff_only": 0.40299999713897705, |
| "tpp_threshold_10_unintended_diff_only": 0.15325000882148743, |
| "tpp_threshold_20_total_metric": 0.20174995064735413, |
| "tpp_threshold_20_intended_diff_only": 0.44599997997283936, |
| "tpp_threshold_20_unintended_diff_only": 0.24425002932548523, |
| "tpp_threshold_50_total_metric": 0.17399998009204865, |
| "tpp_threshold_50_intended_diff_only": 0.4729999899864197, |
| "tpp_threshold_50_unintended_diff_only": 0.29900000989437103, |
| "tpp_threshold_100_total_metric": 0.1289999932050705, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.3460000306367874, |
| "tpp_threshold_500_total_metric": 0.08424998819828033, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.3907500356435776 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.003999993205070496, |
| "tpp_threshold_2_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_2_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_5_total_metric": 0.013749957084655762, |
| "tpp_threshold_5_intended_diff_only": 0.01699995994567871, |
| "tpp_threshold_5_unintended_diff_only": 0.0032500028610229492, |
| "tpp_threshold_10_total_metric": 0.01974998414516449, |
| "tpp_threshold_10_intended_diff_only": 0.030999958515167236, |
| "tpp_threshold_10_unintended_diff_only": 0.011249974370002747, |
| "tpp_threshold_20_total_metric": 0.06299996376037598, |
| "tpp_threshold_20_intended_diff_only": 0.08199995756149292, |
| "tpp_threshold_20_unintended_diff_only": 0.018999993801116943, |
| "tpp_threshold_50_total_metric": 0.22275002300739288, |
| "tpp_threshold_50_intended_diff_only": 0.24900001287460327, |
| "tpp_threshold_50_unintended_diff_only": 0.026249989867210388, |
| "tpp_threshold_100_total_metric": 0.3279999792575836, |
| "tpp_threshold_100_intended_diff_only": 0.37699997425079346, |
| "tpp_threshold_100_unintended_diff_only": 0.04899999499320984, |
| "tpp_threshold_500_total_metric": 0.3929999768733978, |
| "tpp_threshold_500_intended_diff_only": 0.4559999704360962, |
| "tpp_threshold_500_unintended_diff_only": 0.06299999356269836 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.10425002872943878, |
| "tpp_threshold_2_intended_diff_only": 0.125, |
| "tpp_threshold_2_unintended_diff_only": 0.020749971270561218, |
| "tpp_threshold_5_total_metric": 0.21700003743171692, |
| "tpp_threshold_5_intended_diff_only": 0.23900002241134644, |
| "tpp_threshold_5_unintended_diff_only": 0.021999984979629517, |
| "tpp_threshold_10_total_metric": 0.29575003683567047, |
| "tpp_threshold_10_intended_diff_only": 0.31800001859664917, |
| "tpp_threshold_10_unintended_diff_only": 0.0222499817609787, |
| "tpp_threshold_20_total_metric": 0.3450000584125519, |
| "tpp_threshold_20_intended_diff_only": 0.3720000386238098, |
| "tpp_threshold_20_unintended_diff_only": 0.026999980211257935, |
| "tpp_threshold_50_total_metric": 0.3840000629425049, |
| "tpp_threshold_50_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_50_unintended_diff_only": 0.04799997806549072, |
| "tpp_threshold_100_total_metric": 0.3850000351667404, |
| "tpp_threshold_100_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_100_unintended_diff_only": 0.05799998342990875, |
| "tpp_threshold_500_total_metric": 0.3697500675916672, |
| "tpp_threshold_500_intended_diff_only": 0.4450000524520874, |
| "tpp_threshold_500_unintended_diff_only": 0.07524998486042023 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.015250012278556824, |
| "tpp_threshold_2_intended_diff_only": -0.00700002908706665, |
| "tpp_threshold_2_unintended_diff_only": 0.008249983191490173, |
| "tpp_threshold_5_total_metric": -0.00849999487400055, |
| "tpp_threshold_5_intended_diff_only": -0.004999995231628418, |
| "tpp_threshold_5_unintended_diff_only": 0.0034999996423721313, |
| "tpp_threshold_10_total_metric": 0.0029999911785125732, |
| "tpp_threshold_10_intended_diff_only": 0.0059999823570251465, |
| "tpp_threshold_10_unintended_diff_only": 0.0029999911785125732, |
| "tpp_threshold_20_total_metric": 0.026749998331069946, |
| "tpp_threshold_20_intended_diff_only": 0.03200000524520874, |
| "tpp_threshold_20_unintended_diff_only": 0.005250006914138794, |
| "tpp_threshold_50_total_metric": 0.09849996864795685, |
| "tpp_threshold_50_intended_diff_only": 0.10499995946884155, |
| "tpp_threshold_50_unintended_diff_only": 0.006499990820884705, |
| "tpp_threshold_100_total_metric": 0.23924998939037323, |
| "tpp_threshold_100_intended_diff_only": 0.2569999694824219, |
| "tpp_threshold_100_unintended_diff_only": 0.017749980092048645, |
| "tpp_threshold_500_total_metric": 0.3902500420808792, |
| "tpp_threshold_500_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_500_unintended_diff_only": 0.03874997794628143 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.04324999451637268, |
| "tpp_threshold_2_intended_diff_only": 0.04799997806549072, |
| "tpp_threshold_2_unintended_diff_only": 0.004749983549118042, |
| "tpp_threshold_5_total_metric": 0.12900002300739288, |
| "tpp_threshold_5_intended_diff_only": 0.1380000114440918, |
| "tpp_threshold_5_unintended_diff_only": 0.008999988436698914, |
| "tpp_threshold_10_total_metric": 0.2122500091791153, |
| "tpp_threshold_10_intended_diff_only": 0.2279999852180481, |
| "tpp_threshold_10_unintended_diff_only": 0.0157499760389328, |
| "tpp_threshold_20_total_metric": 0.2952500283718109, |
| "tpp_threshold_20_intended_diff_only": 0.31800001859664917, |
| "tpp_threshold_20_unintended_diff_only": 0.022749990224838257, |
| "tpp_threshold_50_total_metric": 0.36125002801418304, |
| "tpp_threshold_50_intended_diff_only": 0.38700002431869507, |
| "tpp_threshold_50_unintended_diff_only": 0.025749996304512024, |
| "tpp_threshold_100_total_metric": 0.3997500389814377, |
| "tpp_threshold_100_intended_diff_only": 0.4280000329017639, |
| "tpp_threshold_100_unintended_diff_only": 0.028249993920326233, |
| "tpp_threshold_500_total_metric": 0.3890000432729721, |
| "tpp_threshold_500_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_500_unintended_diff_only": 0.047999992966651917 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.020500019192695618, |
| "tpp_threshold_2_intended_diff_only": 0.03600001335144043, |
| "tpp_threshold_2_unintended_diff_only": 0.015499994158744812, |
| "tpp_threshold_5_total_metric": 0.07250000536441803, |
| "tpp_threshold_5_intended_diff_only": 0.08300000429153442, |
| "tpp_threshold_5_unintended_diff_only": 0.010499998927116394, |
| "tpp_threshold_10_total_metric": 0.1300000250339508, |
| "tpp_threshold_10_intended_diff_only": 0.14300000667572021, |
| "tpp_threshold_10_unintended_diff_only": 0.01299998164176941, |
| "tpp_threshold_20_total_metric": 0.218999981880188, |
| "tpp_threshold_20_intended_diff_only": 0.2329999804496765, |
| "tpp_threshold_20_unintended_diff_only": 0.013999998569488525, |
| "tpp_threshold_50_total_metric": 0.3264999985694885, |
| "tpp_threshold_50_intended_diff_only": 0.3569999933242798, |
| "tpp_threshold_50_unintended_diff_only": 0.03049999475479126, |
| "tpp_threshold_100_total_metric": 0.35850003361701965, |
| "tpp_threshold_100_intended_diff_only": 0.3920000195503235, |
| "tpp_threshold_100_unintended_diff_only": 0.03349998593330383, |
| "tpp_threshold_500_total_metric": 0.34450003504753113, |
| "tpp_threshold_500_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_500_unintended_diff_only": 0.05149999260902405 |
| } |
| } |
| } |
| } |