| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752640371, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.07780000567436218, |
| "tpp_threshold_2_intended_diff_only": 0.13410001397132873, |
| "tpp_threshold_2_unintended_diff_only": 0.05630000829696655, |
| "tpp_threshold_5_total_metric": 0.08797501027584076, |
| "tpp_threshold_5_intended_diff_only": 0.19100001454353333, |
| "tpp_threshold_5_unintended_diff_only": 0.10302500426769257, |
| "tpp_threshold_10_total_metric": 0.10245000272989273, |
| "tpp_threshold_10_intended_diff_only": 0.23010001182556153, |
| "tpp_threshold_10_unintended_diff_only": 0.1276500090956688, |
| "tpp_threshold_20_total_metric": 0.12669999450445174, |
| "tpp_threshold_20_intended_diff_only": 0.2802000045776367, |
| "tpp_threshold_20_unintended_diff_only": 0.15350001007318495, |
| "tpp_threshold_50_total_metric": 0.15980001389980317, |
| "tpp_threshold_50_intended_diff_only": 0.3496000230312347, |
| "tpp_threshold_50_unintended_diff_only": 0.18980000913143158, |
| "tpp_threshold_100_total_metric": 0.19035000801086427, |
| "tpp_threshold_100_intended_diff_only": 0.40140001773834233, |
| "tpp_threshold_100_unintended_diff_only": 0.211050009727478, |
| "tpp_threshold_500_total_metric": 0.19660003036260604, |
| "tpp_threshold_500_intended_diff_only": 0.4421000421047211, |
| "tpp_threshold_500_unintended_diff_only": 0.245500011742115 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.14475001394748688, |
| "tpp_threshold_2_intended_diff_only": 0.24960001707077026, |
| "tpp_threshold_2_unintended_diff_only": 0.10485000312328338, |
| "tpp_threshold_5_total_metric": 0.1489000141620636, |
| "tpp_threshold_5_intended_diff_only": 0.3442000150680542, |
| "tpp_threshold_5_unintended_diff_only": 0.1953000009059906, |
| "tpp_threshold_10_total_metric": 0.14525000154972076, |
| "tpp_threshold_10_intended_diff_only": 0.38640000820159914, |
| "tpp_threshold_10_unintended_diff_only": 0.24115000665187836, |
| "tpp_threshold_20_total_metric": 0.13659998774528503, |
| "tpp_threshold_20_intended_diff_only": 0.42139999866485595, |
| "tpp_threshold_20_unintended_diff_only": 0.2848000109195709, |
| "tpp_threshold_50_total_metric": 0.10574999749660492, |
| "tpp_threshold_50_intended_diff_only": 0.4520000100135803, |
| "tpp_threshold_50_unintended_diff_only": 0.3462500125169754, |
| "tpp_threshold_100_total_metric": 0.08964999616146088, |
| "tpp_threshold_100_intended_diff_only": 0.4644000053405762, |
| "tpp_threshold_100_unintended_diff_only": 0.3747500091791153, |
| "tpp_threshold_500_total_metric": 0.06530002057552338, |
| "tpp_threshold_500_intended_diff_only": 0.46700003147125246, |
| "tpp_threshold_500_unintended_diff_only": 0.40170001089572904 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.010849997401237488, |
| "tpp_threshold_2_intended_diff_only": 0.01860001087188721, |
| "tpp_threshold_2_unintended_diff_only": 0.007750013470649719, |
| "tpp_threshold_5_total_metric": 0.02705000638961792, |
| "tpp_threshold_5_intended_diff_only": 0.03780001401901245, |
| "tpp_threshold_5_unintended_diff_only": 0.010750007629394532, |
| "tpp_threshold_10_total_metric": 0.0596500039100647, |
| "tpp_threshold_10_intended_diff_only": 0.07380001544952393, |
| "tpp_threshold_10_unintended_diff_only": 0.014150011539459228, |
| "tpp_threshold_20_total_metric": 0.11680000126361847, |
| "tpp_threshold_20_intended_diff_only": 0.1390000104904175, |
| "tpp_threshold_20_unintended_diff_only": 0.02220000922679901, |
| "tpp_threshold_50_total_metric": 0.2138500303030014, |
| "tpp_threshold_50_intended_diff_only": 0.24720003604888915, |
| "tpp_threshold_50_unintended_diff_only": 0.033350005745887756, |
| "tpp_threshold_100_total_metric": 0.29105001986026763, |
| "tpp_threshold_100_intended_diff_only": 0.3384000301361084, |
| "tpp_threshold_100_unintended_diff_only": 0.04735001027584076, |
| "tpp_threshold_500_total_metric": 0.3279000401496887, |
| "tpp_threshold_500_intended_diff_only": 0.4172000527381897, |
| "tpp_threshold_500_unintended_diff_only": 0.08930001258850098 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.13.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.13.hook_resid_post", |
| "hook_layer": 13, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.19025000929832458, |
| "tpp_threshold_2_intended_diff_only": 0.3230000138282776, |
| "tpp_threshold_2_unintended_diff_only": 0.132750004529953, |
| "tpp_threshold_5_total_metric": 0.1692499965429306, |
| "tpp_threshold_5_intended_diff_only": 0.3610000014305115, |
| "tpp_threshold_5_unintended_diff_only": 0.19175000488758087, |
| "tpp_threshold_10_total_metric": 0.1652500033378601, |
| "tpp_threshold_10_intended_diff_only": 0.3880000114440918, |
| "tpp_threshold_10_unintended_diff_only": 0.2227500081062317, |
| "tpp_threshold_20_total_metric": 0.14224998652935028, |
| "tpp_threshold_20_intended_diff_only": 0.42100000381469727, |
| "tpp_threshold_20_unintended_diff_only": 0.278750017285347, |
| "tpp_threshold_50_total_metric": 0.10124997794628143, |
| "tpp_threshold_50_intended_diff_only": 0.44099998474121094, |
| "tpp_threshold_50_unintended_diff_only": 0.3397500067949295, |
| "tpp_threshold_100_total_metric": 0.08099998533725739, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.36900000274181366, |
| "tpp_threshold_500_total_metric": 0.058750003576278687, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.3932500183582306 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.1380000114440918, |
| "tpp_threshold_2_intended_diff_only": 0.26899999380111694, |
| "tpp_threshold_2_unintended_diff_only": 0.13099998235702515, |
| "tpp_threshold_5_total_metric": 0.12449999153614044, |
| "tpp_threshold_5_intended_diff_only": 0.3019999861717224, |
| "tpp_threshold_5_unintended_diff_only": 0.17749999463558197, |
| "tpp_threshold_10_total_metric": 0.13950002193450928, |
| "tpp_threshold_10_intended_diff_only": 0.3500000238418579, |
| "tpp_threshold_10_unintended_diff_only": 0.21050000190734863, |
| "tpp_threshold_20_total_metric": 0.11674998700618744, |
| "tpp_threshold_20_intended_diff_only": 0.4039999842643738, |
| "tpp_threshold_20_unintended_diff_only": 0.28724999725818634, |
| "tpp_threshold_50_total_metric": 0.0807499885559082, |
| "tpp_threshold_50_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_50_unintended_diff_only": 0.3682500123977661, |
| "tpp_threshold_100_total_metric": 0.07024997472763062, |
| "tpp_threshold_100_intended_diff_only": 0.45899999141693115, |
| "tpp_threshold_100_unintended_diff_only": 0.38875001668930054, |
| "tpp_threshold_500_total_metric": 0.052250027656555176, |
| "tpp_threshold_500_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_500_unintended_diff_only": 0.4077500104904175 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.11250004172325134, |
| "tpp_threshold_2_intended_diff_only": 0.26500004529953003, |
| "tpp_threshold_2_unintended_diff_only": 0.1525000035762787, |
| "tpp_threshold_5_total_metric": 0.13700000941753387, |
| "tpp_threshold_5_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.20999999344348907, |
| "tpp_threshold_10_total_metric": 0.07224999368190765, |
| "tpp_threshold_10_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_10_unintended_diff_only": 0.3017500191926956, |
| "tpp_threshold_20_total_metric": 0.09274999797344208, |
| "tpp_threshold_20_intended_diff_only": 0.406000018119812, |
| "tpp_threshold_20_unintended_diff_only": 0.31325002014636993, |
| "tpp_threshold_50_total_metric": 0.08100003004074097, |
| "tpp_threshold_50_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_50_unintended_diff_only": 0.35199999809265137, |
| "tpp_threshold_100_total_metric": 0.08050000667572021, |
| "tpp_threshold_100_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_100_unintended_diff_only": 0.37150001525878906, |
| "tpp_threshold_500_total_metric": 0.04375004768371582, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.4102500081062317 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.18399998545646667, |
| "tpp_threshold_2_intended_diff_only": 0.21799999475479126, |
| "tpp_threshold_2_unintended_diff_only": 0.034000009298324585, |
| "tpp_threshold_5_total_metric": 0.22700002789497375, |
| "tpp_threshold_5_intended_diff_only": 0.3680000305175781, |
| "tpp_threshold_5_unintended_diff_only": 0.14100000262260437, |
| "tpp_threshold_10_total_metric": 0.2827499806880951, |
| "tpp_threshold_10_intended_diff_only": 0.44599997997283936, |
| "tpp_threshold_10_unintended_diff_only": 0.16324999928474426, |
| "tpp_threshold_20_total_metric": 0.24599996209144592, |
| "tpp_threshold_20_intended_diff_only": 0.4739999771118164, |
| "tpp_threshold_20_unintended_diff_only": 0.22800001502037048, |
| "tpp_threshold_50_total_metric": 0.1642499566078186, |
| "tpp_threshold_50_intended_diff_only": 0.4819999933242798, |
| "tpp_threshold_50_unintended_diff_only": 0.3177500367164612, |
| "tpp_threshold_100_total_metric": 0.12725000083446503, |
| "tpp_threshold_100_intended_diff_only": 0.48500001430511475, |
| "tpp_threshold_100_unintended_diff_only": 0.3577500134706497, |
| "tpp_threshold_500_total_metric": 0.09924997389316559, |
| "tpp_threshold_500_intended_diff_only": 0.4919999837875366, |
| "tpp_threshold_500_unintended_diff_only": 0.39275000989437103 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.09900002181529999, |
| "tpp_threshold_2_intended_diff_only": 0.1730000376701355, |
| "tpp_threshold_2_unintended_diff_only": 0.07400001585483551, |
| "tpp_threshold_5_total_metric": 0.08675004541873932, |
| "tpp_threshold_5_intended_diff_only": 0.34300005435943604, |
| "tpp_threshold_5_unintended_diff_only": 0.2562500089406967, |
| "tpp_threshold_10_total_metric": 0.06650000810623169, |
| "tpp_threshold_10_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_10_unintended_diff_only": 0.3075000047683716, |
| "tpp_threshold_20_total_metric": 0.08525000512599945, |
| "tpp_threshold_20_intended_diff_only": 0.4020000100135803, |
| "tpp_threshold_20_unintended_diff_only": 0.31675000488758087, |
| "tpp_threshold_50_total_metric": 0.10150003433227539, |
| "tpp_threshold_50_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_50_unintended_diff_only": 0.35350000858306885, |
| "tpp_threshold_100_total_metric": 0.08925001323223114, |
| "tpp_threshold_100_intended_diff_only": 0.47600001096725464, |
| "tpp_threshold_100_unintended_diff_only": 0.3867499977350235, |
| "tpp_threshold_500_total_metric": 0.07250005006790161, |
| "tpp_threshold_500_intended_diff_only": 0.47700005769729614, |
| "tpp_threshold_500_unintended_diff_only": 0.40450000762939453 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.011000007390975952, |
| "tpp_threshold_2_intended_diff_only": 0.016000032424926758, |
| "tpp_threshold_2_unintended_diff_only": 0.005000025033950806, |
| "tpp_threshold_5_total_metric": 0.006000027060508728, |
| "tpp_threshold_5_intended_diff_only": 0.01100003719329834, |
| "tpp_threshold_5_unintended_diff_only": 0.005000010132789612, |
| "tpp_threshold_10_total_metric": 0.017750024795532227, |
| "tpp_threshold_10_intended_diff_only": 0.026000022888183594, |
| "tpp_threshold_10_unintended_diff_only": 0.008249998092651367, |
| "tpp_threshold_20_total_metric": 0.049500033259391785, |
| "tpp_threshold_20_intended_diff_only": 0.07600003480911255, |
| "tpp_threshold_20_unintended_diff_only": 0.026500001549720764, |
| "tpp_threshold_50_total_metric": 0.13950003683567047, |
| "tpp_threshold_50_intended_diff_only": 0.17800003290176392, |
| "tpp_threshold_50_unintended_diff_only": 0.038499996066093445, |
| "tpp_threshold_100_total_metric": 0.24550001323223114, |
| "tpp_threshold_100_intended_diff_only": 0.28600001335144043, |
| "tpp_threshold_100_unintended_diff_only": 0.04050000011920929, |
| "tpp_threshold_500_total_metric": 0.382250040769577, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.07175001502037048 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.016250014305114746, |
| "tpp_threshold_2_intended_diff_only": 0.022000014781951904, |
| "tpp_threshold_2_unintended_diff_only": 0.005750000476837158, |
| "tpp_threshold_5_total_metric": 0.0299999862909317, |
| "tpp_threshold_5_intended_diff_only": 0.046999990940093994, |
| "tpp_threshold_5_unintended_diff_only": 0.017000004649162292, |
| "tpp_threshold_10_total_metric": 0.06924998760223389, |
| "tpp_threshold_10_intended_diff_only": 0.08799999952316284, |
| "tpp_threshold_10_unintended_diff_only": 0.018750011920928955, |
| "tpp_threshold_20_total_metric": 0.1522500067949295, |
| "tpp_threshold_20_intended_diff_only": 0.17900002002716064, |
| "tpp_threshold_20_unintended_diff_only": 0.02675001323223114, |
| "tpp_threshold_50_total_metric": 0.2615000158548355, |
| "tpp_threshold_50_intended_diff_only": 0.30800002813339233, |
| "tpp_threshold_50_unintended_diff_only": 0.046500012278556824, |
| "tpp_threshold_100_total_metric": 0.3452500253915787, |
| "tpp_threshold_100_intended_diff_only": 0.40400004386901855, |
| "tpp_threshold_100_unintended_diff_only": 0.05875001847743988, |
| "tpp_threshold_500_total_metric": 0.3087500333786011, |
| "tpp_threshold_500_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_500_unintended_diff_only": 0.11825001239776611 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.008249983191490173, |
| "tpp_threshold_2_intended_diff_only": -0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": 0.006250008940696716, |
| "tpp_threshold_5_total_metric": -0.003999978303909302, |
| "tpp_threshold_5_intended_diff_only": 0.003000020980834961, |
| "tpp_threshold_5_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_10_total_metric": 0.02950000762939453, |
| "tpp_threshold_10_intended_diff_only": 0.0350000262260437, |
| "tpp_threshold_10_unintended_diff_only": 0.00550001859664917, |
| "tpp_threshold_20_total_metric": 0.03949999809265137, |
| "tpp_threshold_20_intended_diff_only": 0.06000000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.020500004291534424, |
| "tpp_threshold_50_total_metric": 0.10700003802776337, |
| "tpp_threshold_50_intended_diff_only": 0.12700003385543823, |
| "tpp_threshold_50_unintended_diff_only": 0.019999995827674866, |
| "tpp_threshold_100_total_metric": 0.21275004744529724, |
| "tpp_threshold_100_intended_diff_only": 0.2600000500679016, |
| "tpp_threshold_100_unintended_diff_only": 0.04725000262260437, |
| "tpp_threshold_500_total_metric": 0.34775006771087646, |
| "tpp_threshold_500_intended_diff_only": 0.42500007152557373, |
| "tpp_threshold_500_unintended_diff_only": 0.07725000381469727 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.020500004291534424, |
| "tpp_threshold_2_intended_diff_only": 0.027000010013580322, |
| "tpp_threshold_2_unintended_diff_only": 0.0065000057220458984, |
| "tpp_threshold_5_total_metric": 0.037250012159347534, |
| "tpp_threshold_5_intended_diff_only": 0.04900002479553223, |
| "tpp_threshold_5_unintended_diff_only": 0.011750012636184692, |
| "tpp_threshold_10_total_metric": 0.0712500512599945, |
| "tpp_threshold_10_intended_diff_only": 0.08900004625320435, |
| "tpp_threshold_10_unintended_diff_only": 0.01774999499320984, |
| "tpp_threshold_20_total_metric": 0.1367500275373459, |
| "tpp_threshold_20_intended_diff_only": 0.15500003099441528, |
| "tpp_threshold_20_unintended_diff_only": 0.018250003457069397, |
| "tpp_threshold_50_total_metric": 0.26325006783008575, |
| "tpp_threshold_50_intended_diff_only": 0.2910000681877136, |
| "tpp_threshold_50_unintended_diff_only": 0.02775000035762787, |
| "tpp_threshold_100_total_metric": 0.33775006234645844, |
| "tpp_threshold_100_intended_diff_only": 0.3750000596046448, |
| "tpp_threshold_100_unintended_diff_only": 0.03724999725818634, |
| "tpp_threshold_500_total_metric": 0.32975006103515625, |
| "tpp_threshold_500_intended_diff_only": 0.4110000729560852, |
| "tpp_threshold_500_unintended_diff_only": 0.08125001192092896 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.01474994421005249, |
| "tpp_threshold_2_intended_diff_only": 0.029999971389770508, |
| "tpp_threshold_2_unintended_diff_only": 0.015250027179718018, |
| "tpp_threshold_5_total_metric": 0.06599998474121094, |
| "tpp_threshold_5_intended_diff_only": 0.07899999618530273, |
| "tpp_threshold_5_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_10_total_metric": 0.11049994826316833, |
| "tpp_threshold_10_intended_diff_only": 0.13099998235702515, |
| "tpp_threshold_10_unintended_diff_only": 0.02050003409385681, |
| "tpp_threshold_20_total_metric": 0.2059999406337738, |
| "tpp_threshold_20_intended_diff_only": 0.22499996423721313, |
| "tpp_threshold_20_unintended_diff_only": 0.01900002360343933, |
| "tpp_threshold_50_total_metric": 0.2979999929666519, |
| "tpp_threshold_50_intended_diff_only": 0.3320000171661377, |
| "tpp_threshold_50_unintended_diff_only": 0.03400002419948578, |
| "tpp_threshold_100_total_metric": 0.3139999508857727, |
| "tpp_threshold_100_intended_diff_only": 0.3669999837875366, |
| "tpp_threshold_100_unintended_diff_only": 0.053000032901763916, |
| "tpp_threshold_500_total_metric": 0.2709999978542328, |
| "tpp_threshold_500_intended_diff_only": 0.36900001764297485, |
| "tpp_threshold_500_unintended_diff_only": 0.09800001978874207 |
| } |
| } |
| } |
| } |