| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745617236522, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.016100001335144044, |
| "tpp_threshold_2_intended_diff_only": 0.016700005531311034, |
| "tpp_threshold_2_unintended_diff_only": 0.0006000041961669922, |
| "tpp_threshold_5_total_metric": 0.09494999945163726, |
| "tpp_threshold_5_intended_diff_only": 0.10510000586509705, |
| "tpp_threshold_5_unintended_diff_only": 0.010150006413459778, |
| "tpp_threshold_10_total_metric": 0.18580001145601271, |
| "tpp_threshold_10_intended_diff_only": 0.20970001816749573, |
| "tpp_threshold_10_unintended_diff_only": 0.023900006711483002, |
| "tpp_threshold_20_total_metric": 0.2513000205159187, |
| "tpp_threshold_20_intended_diff_only": 0.29320002198219297, |
| "tpp_threshold_20_unintended_diff_only": 0.04190000146627426, |
| "tpp_threshold_50_total_metric": 0.30067501962184906, |
| "tpp_threshold_50_intended_diff_only": 0.4029000282287598, |
| "tpp_threshold_50_unintended_diff_only": 0.1022250086069107, |
| "tpp_threshold_100_total_metric": 0.2945500180125237, |
| "tpp_threshold_100_intended_diff_only": 0.43100003004074094, |
| "tpp_threshold_100_unintended_diff_only": 0.13645001202821733, |
| "tpp_threshold_500_total_metric": 0.20857502967119218, |
| "tpp_threshold_500_intended_diff_only": 0.43320004940032963, |
| "tpp_threshold_500_unintended_diff_only": 0.2246250197291374 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.01604999303817749, |
| "tpp_threshold_2_intended_diff_only": 0.018800008296966552, |
| "tpp_threshold_2_unintended_diff_only": 0.0027500152587890624, |
| "tpp_threshold_5_total_metric": 0.15740000307559968, |
| "tpp_threshold_5_intended_diff_only": 0.17660001516342164, |
| "tpp_threshold_5_unintended_diff_only": 0.01920001208782196, |
| "tpp_threshold_10_total_metric": 0.2968500107526779, |
| "tpp_threshold_10_intended_diff_only": 0.33420002460479736, |
| "tpp_threshold_10_unintended_diff_only": 0.037350013852119446, |
| "tpp_threshold_20_total_metric": 0.36515002250671386, |
| "tpp_threshold_20_intended_diff_only": 0.42980003356933594, |
| "tpp_threshold_20_unintended_diff_only": 0.06465001106262207, |
| "tpp_threshold_50_total_metric": 0.2887000232934952, |
| "tpp_threshold_50_intended_diff_only": 0.459600043296814, |
| "tpp_threshold_50_unintended_diff_only": 0.17090002000331878, |
| "tpp_threshold_100_total_metric": 0.22960002720355988, |
| "tpp_threshold_100_intended_diff_only": 0.46360005140304567, |
| "tpp_threshold_100_unintended_diff_only": 0.2340000241994858, |
| "tpp_threshold_500_total_metric": 0.09970002770423889, |
| "tpp_threshold_500_intended_diff_only": 0.4640000581741333, |
| "tpp_threshold_500_unintended_diff_only": 0.3643000304698944 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.016150009632110596, |
| "tpp_threshold_2_intended_diff_only": 0.014600002765655517, |
| "tpp_threshold_2_unintended_diff_only": -0.001550006866455078, |
| "tpp_threshold_5_total_metric": 0.03249999582767486, |
| "tpp_threshold_5_intended_diff_only": 0.03359999656677246, |
| "tpp_threshold_5_unintended_diff_only": 0.0011000007390975953, |
| "tpp_threshold_10_total_metric": 0.07475001215934754, |
| "tpp_threshold_10_intended_diff_only": 0.08520001173019409, |
| "tpp_threshold_10_unintended_diff_only": 0.010449999570846557, |
| "tpp_threshold_20_total_metric": 0.1374500185251236, |
| "tpp_threshold_20_intended_diff_only": 0.15660001039505006, |
| "tpp_threshold_20_unintended_diff_only": 0.019149991869926452, |
| "tpp_threshold_50_total_metric": 0.3126500159502029, |
| "tpp_threshold_50_intended_diff_only": 0.3462000131607056, |
| "tpp_threshold_50_unintended_diff_only": 0.03354999721050263, |
| "tpp_threshold_100_total_metric": 0.3595000088214874, |
| "tpp_threshold_100_intended_diff_only": 0.3984000086784363, |
| "tpp_threshold_100_unintended_diff_only": 0.03889999985694885, |
| "tpp_threshold_500_total_metric": 0.31745003163814545, |
| "tpp_threshold_500_intended_diff_only": 0.4024000406265259, |
| "tpp_threshold_500_unintended_diff_only": 0.08495000898838043 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.4.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.4.hook_resid_post", |
| "hook_layer": 4, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.008250012993812561, |
| "tpp_threshold_2_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_2_unintended_diff_only": 0.0037500113248825073, |
| "tpp_threshold_5_total_metric": 0.16550005972385406, |
| "tpp_threshold_5_intended_diff_only": 0.17600005865097046, |
| "tpp_threshold_5_unintended_diff_only": 0.010499998927116394, |
| "tpp_threshold_10_total_metric": 0.2580000162124634, |
| "tpp_threshold_10_intended_diff_only": 0.3270000219345093, |
| "tpp_threshold_10_unintended_diff_only": 0.0690000057220459, |
| "tpp_threshold_20_total_metric": 0.25425003468990326, |
| "tpp_threshold_20_intended_diff_only": 0.39900004863739014, |
| "tpp_threshold_20_unintended_diff_only": 0.14475001394748688, |
| "tpp_threshold_50_total_metric": 0.2407500445842743, |
| "tpp_threshold_50_intended_diff_only": 0.4310000538825989, |
| "tpp_threshold_50_unintended_diff_only": 0.19025000929832458, |
| "tpp_threshold_100_total_metric": 0.21650001406669617, |
| "tpp_threshold_100_intended_diff_only": 0.44200003147125244, |
| "tpp_threshold_100_unintended_diff_only": 0.22550001740455627, |
| "tpp_threshold_500_total_metric": 0.08200004696846008, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.3620000183582306 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.01574993133544922, |
| "tpp_threshold_2_intended_diff_only": 0.01699995994567871, |
| "tpp_threshold_2_unintended_diff_only": 0.0012500286102294922, |
| "tpp_threshold_5_total_metric": 0.18474994599819183, |
| "tpp_threshold_5_intended_diff_only": 0.2059999704360962, |
| "tpp_threshold_5_unintended_diff_only": 0.021250024437904358, |
| "tpp_threshold_10_total_metric": 0.2709999829530716, |
| "tpp_threshold_10_intended_diff_only": 0.29600000381469727, |
| "tpp_threshold_10_unintended_diff_only": 0.02500002086162567, |
| "tpp_threshold_20_total_metric": 0.3767499625682831, |
| "tpp_threshold_20_intended_diff_only": 0.4229999780654907, |
| "tpp_threshold_20_unintended_diff_only": 0.04625001549720764, |
| "tpp_threshold_50_total_metric": 0.21399995684623718, |
| "tpp_threshold_50_intended_diff_only": 0.46399998664855957, |
| "tpp_threshold_50_unintended_diff_only": 0.2500000298023224, |
| "tpp_threshold_100_total_metric": 0.16050000488758087, |
| "tpp_threshold_100_intended_diff_only": 0.4650000333786011, |
| "tpp_threshold_100_unintended_diff_only": 0.3045000284910202, |
| "tpp_threshold_500_total_metric": 0.08124999701976776, |
| "tpp_threshold_500_intended_diff_only": 0.4650000333786011, |
| "tpp_threshold_500_unintended_diff_only": 0.3837500363588333 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.040000006556510925, |
| "tpp_threshold_2_intended_diff_only": 0.04500001668930054, |
| "tpp_threshold_2_unintended_diff_only": 0.005000010132789612, |
| "tpp_threshold_5_total_metric": 0.19975003600120544, |
| "tpp_threshold_5_intended_diff_only": 0.21400004625320435, |
| "tpp_threshold_5_unintended_diff_only": 0.014250010251998901, |
| "tpp_threshold_10_total_metric": 0.28325000405311584, |
| "tpp_threshold_10_intended_diff_only": 0.30400002002716064, |
| "tpp_threshold_10_unintended_diff_only": 0.0207500159740448, |
| "tpp_threshold_20_total_metric": 0.35625000298023224, |
| "tpp_threshold_20_intended_diff_only": 0.3970000147819519, |
| "tpp_threshold_20_unintended_diff_only": 0.040750011801719666, |
| "tpp_threshold_50_total_metric": 0.3215000629425049, |
| "tpp_threshold_50_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_50_unintended_diff_only": 0.11750000715255737, |
| "tpp_threshold_100_total_metric": 0.23175005614757538, |
| "tpp_threshold_100_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_100_unintended_diff_only": 0.2122500091791153, |
| "tpp_threshold_500_total_metric": 0.052000030875205994, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.3920000344514847 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.004749983549118042, |
| "tpp_threshold_2_intended_diff_only": 0.004999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.000250011682510376, |
| "tpp_threshold_5_total_metric": 0.05199997127056122, |
| "tpp_threshold_5_intended_diff_only": 0.08399999141693115, |
| "tpp_threshold_5_unintended_diff_only": 0.032000020146369934, |
| "tpp_threshold_10_total_metric": 0.2707499861717224, |
| "tpp_threshold_10_intended_diff_only": 0.3190000057220459, |
| "tpp_threshold_10_unintended_diff_only": 0.048250019550323486, |
| "tpp_threshold_20_total_metric": 0.4047500491142273, |
| "tpp_threshold_20_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_20_unintended_diff_only": 0.049250006675720215, |
| "tpp_threshold_50_total_metric": 0.34950000047683716, |
| "tpp_threshold_50_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_50_unintended_diff_only": 0.13850003480911255, |
| "tpp_threshold_100_total_metric": 0.28575001657009125, |
| "tpp_threshold_100_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_100_unintended_diff_only": 0.20525003969669342, |
| "tpp_threshold_500_total_metric": 0.16625002026557922, |
| "tpp_threshold_500_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_500_unintended_diff_only": 0.32475003600120544 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.011500030755996704, |
| "tpp_threshold_2_intended_diff_only": 0.01500004529953003, |
| "tpp_threshold_2_unintended_diff_only": 0.003500014543533325, |
| "tpp_threshold_5_total_metric": 0.1850000023841858, |
| "tpp_threshold_5_intended_diff_only": 0.203000009059906, |
| "tpp_threshold_5_unintended_diff_only": 0.018000006675720215, |
| "tpp_threshold_10_total_metric": 0.40125006437301636, |
| "tpp_threshold_10_intended_diff_only": 0.42500007152557373, |
| "tpp_threshold_10_unintended_diff_only": 0.023750007152557373, |
| "tpp_threshold_20_total_metric": 0.43375006318092346, |
| "tpp_threshold_20_intended_diff_only": 0.4760000705718994, |
| "tpp_threshold_20_unintended_diff_only": 0.04225000739097595, |
| "tpp_threshold_50_total_metric": 0.3177500516176224, |
| "tpp_threshold_50_intended_diff_only": 0.4760000705718994, |
| "tpp_threshold_50_unintended_diff_only": 0.15825001895427704, |
| "tpp_threshold_100_total_metric": 0.2535000443458557, |
| "tpp_threshold_100_intended_diff_only": 0.4760000705718994, |
| "tpp_threshold_100_unintended_diff_only": 0.2225000262260437, |
| "tpp_threshold_500_total_metric": 0.1170000433921814, |
| "tpp_threshold_500_intended_diff_only": 0.4760000705718994, |
| "tpp_threshold_500_unintended_diff_only": 0.359000027179718 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.013500064611434937, |
| "tpp_threshold_2_intended_diff_only": 0.010000050067901611, |
| "tpp_threshold_2_unintended_diff_only": -0.003500014543533325, |
| "tpp_threshold_5_total_metric": 0.028250038623809814, |
| "tpp_threshold_5_intended_diff_only": 0.025000035762786865, |
| "tpp_threshold_5_unintended_diff_only": -0.0032500028610229492, |
| "tpp_threshold_10_total_metric": 0.1480000615119934, |
| "tpp_threshold_10_intended_diff_only": 0.18100005388259888, |
| "tpp_threshold_10_unintended_diff_only": 0.03299999237060547, |
| "tpp_threshold_20_total_metric": 0.24275004863739014, |
| "tpp_threshold_20_intended_diff_only": 0.2890000343322754, |
| "tpp_threshold_20_unintended_diff_only": 0.046249985694885254, |
| "tpp_threshold_50_total_metric": 0.35725004971027374, |
| "tpp_threshold_50_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_50_unintended_diff_only": 0.0507500022649765, |
| "tpp_threshold_100_total_metric": 0.37950003147125244, |
| "tpp_threshold_100_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_100_unintended_diff_only": 0.05349999666213989, |
| "tpp_threshold_500_total_metric": 0.3500000685453415, |
| "tpp_threshold_500_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_500_unintended_diff_only": 0.08900000154972076 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.020999982953071594, |
| "tpp_threshold_2_intended_diff_only": 0.0209999680519104, |
| "tpp_threshold_2_unintended_diff_only": -1.4901161193847656e-08, |
| "tpp_threshold_5_total_metric": 0.02899998426437378, |
| "tpp_threshold_5_intended_diff_only": 0.02899998426437378, |
| "tpp_threshold_5_unintended_diff_only": 0.0, |
| "tpp_threshold_10_total_metric": 0.04074998199939728, |
| "tpp_threshold_10_intended_diff_only": 0.041999995708465576, |
| "tpp_threshold_10_unintended_diff_only": 0.0012500137090682983, |
| "tpp_threshold_20_total_metric": 0.06325000524520874, |
| "tpp_threshold_20_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_20_unintended_diff_only": 0.005750000476837158, |
| "tpp_threshold_50_total_metric": 0.29224999248981476, |
| "tpp_threshold_50_intended_diff_only": 0.3330000042915344, |
| "tpp_threshold_50_unintended_diff_only": 0.040750011801719666, |
| "tpp_threshold_100_total_metric": 0.3659999817609787, |
| "tpp_threshold_100_intended_diff_only": 0.4169999957084656, |
| "tpp_threshold_100_unintended_diff_only": 0.05100001394748688, |
| "tpp_threshold_500_total_metric": 0.3172500282526016, |
| "tpp_threshold_500_intended_diff_only": 0.4230000376701355, |
| "tpp_threshold_500_unintended_diff_only": 0.10575000941753387 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.01649998128414154, |
| "tpp_threshold_2_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_2_unintended_diff_only": -0.006499990820884705, |
| "tpp_threshold_5_total_metric": 0.013249978423118591, |
| "tpp_threshold_5_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_5_unintended_diff_only": 0.005750015377998352, |
| "tpp_threshold_10_total_metric": 0.03674998879432678, |
| "tpp_threshold_10_intended_diff_only": 0.041999995708465576, |
| "tpp_threshold_10_unintended_diff_only": 0.005250006914138794, |
| "tpp_threshold_20_total_metric": 0.1002499908208847, |
| "tpp_threshold_20_intended_diff_only": 0.12099999189376831, |
| "tpp_threshold_20_unintended_diff_only": 0.020750001072883606, |
| "tpp_threshold_50_total_metric": 0.3060000091791153, |
| "tpp_threshold_50_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.040999993681907654, |
| "tpp_threshold_100_total_metric": 0.34999997913837433, |
| "tpp_threshold_100_intended_diff_only": 0.39399999380111694, |
| "tpp_threshold_100_unintended_diff_only": 0.044000014662742615, |
| "tpp_threshold_500_total_metric": 0.2952499985694885, |
| "tpp_threshold_500_intended_diff_only": 0.3970000147819519, |
| "tpp_threshold_500_unintended_diff_only": 0.10175001621246338 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.014500007033348083, |
| "tpp_threshold_2_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_2_unintended_diff_only": 0.0034999996423721313, |
| "tpp_threshold_5_total_metric": 0.06624995172023773, |
| "tpp_threshold_5_intended_diff_only": 0.07199996709823608, |
| "tpp_threshold_5_unintended_diff_only": 0.005750015377998352, |
| "tpp_threshold_10_total_metric": 0.11000001430511475, |
| "tpp_threshold_10_intended_diff_only": 0.11500000953674316, |
| "tpp_threshold_10_unintended_diff_only": 0.004999995231628418, |
| "tpp_threshold_20_total_metric": 0.1705000251531601, |
| "tpp_threshold_20_intended_diff_only": 0.17900002002716064, |
| "tpp_threshold_20_unintended_diff_only": 0.00849999487400055, |
| "tpp_threshold_50_total_metric": 0.33924998342990875, |
| "tpp_threshold_50_intended_diff_only": 0.3489999771118164, |
| "tpp_threshold_50_unintended_diff_only": 0.009749993681907654, |
| "tpp_threshold_100_total_metric": 0.3940000385046005, |
| "tpp_threshold_100_intended_diff_only": 0.4010000228881836, |
| "tpp_threshold_100_unintended_diff_only": 0.006999984383583069, |
| "tpp_threshold_500_total_metric": 0.35625001788139343, |
| "tpp_threshold_500_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_500_unintended_diff_only": 0.04875001311302185 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.015250012278556824, |
| "tpp_threshold_2_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_2_unintended_diff_only": -0.0012500137090682983, |
| "tpp_threshold_5_total_metric": 0.02575002610683441, |
| "tpp_threshold_5_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_5_unintended_diff_only": -0.002750024199485779, |
| "tpp_threshold_10_total_metric": 0.03825001418590546, |
| "tpp_threshold_10_intended_diff_only": 0.046000003814697266, |
| "tpp_threshold_10_unintended_diff_only": 0.007749989628791809, |
| "tpp_threshold_20_total_metric": 0.1105000227689743, |
| "tpp_threshold_20_intended_diff_only": 0.125, |
| "tpp_threshold_20_unintended_diff_only": 0.014499977231025696, |
| "tpp_threshold_50_total_metric": 0.26850004494190216, |
| "tpp_threshold_50_intended_diff_only": 0.2940000295639038, |
| "tpp_threshold_50_unintended_diff_only": 0.025499984622001648, |
| "tpp_threshold_100_total_metric": 0.30800001323223114, |
| "tpp_threshold_100_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_100_unintended_diff_only": 0.03899998962879181, |
| "tpp_threshold_500_total_metric": 0.26850004494190216, |
| "tpp_threshold_500_intended_diff_only": 0.34800004959106445, |
| "tpp_threshold_500_unintended_diff_only": 0.07950000464916229 |
| } |
| } |
| } |
| } |