| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745616608644, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.09617500007152557, |
| "tpp_threshold_2_intended_diff_only": 0.16330000162124633, |
| "tpp_threshold_2_unintended_diff_only": 0.06712500154972076, |
| "tpp_threshold_5_total_metric": 0.10335000604391098, |
| "tpp_threshold_5_intended_diff_only": 0.21920000910758972, |
| "tpp_threshold_5_unintended_diff_only": 0.11585000306367874, |
| "tpp_threshold_10_total_metric": 0.11707500219345093, |
| "tpp_threshold_10_intended_diff_only": 0.25910000801086425, |
| "tpp_threshold_10_unintended_diff_only": 0.1420250058174133, |
| "tpp_threshold_20_total_metric": 0.16109999716281892, |
| "tpp_threshold_20_intended_diff_only": 0.3318000078201294, |
| "tpp_threshold_20_unintended_diff_only": 0.17070001065731047, |
| "tpp_threshold_50_total_metric": 0.20375001728534697, |
| "tpp_threshold_50_intended_diff_only": 0.4124000251293183, |
| "tpp_threshold_50_unintended_diff_only": 0.20865000784397125, |
| "tpp_threshold_100_total_metric": 0.19620002210140228, |
| "tpp_threshold_100_intended_diff_only": 0.4240000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.22780001461505892, |
| "tpp_threshold_500_total_metric": 0.1313250318169594, |
| "tpp_threshold_500_intended_diff_only": 0.42440004348754884, |
| "tpp_threshold_500_unintended_diff_only": 0.29307501167058947 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.17115000188350676, |
| "tpp_threshold_2_intended_diff_only": 0.30280001163482667, |
| "tpp_threshold_2_unintended_diff_only": 0.13165000975131988, |
| "tpp_threshold_5_total_metric": 0.16205000281333923, |
| "tpp_threshold_5_intended_diff_only": 0.3870000123977661, |
| "tpp_threshold_5_unintended_diff_only": 0.22495000958442687, |
| "tpp_threshold_10_total_metric": 0.14950000941753389, |
| "tpp_threshold_10_intended_diff_only": 0.4226000189781189, |
| "tpp_threshold_10_unintended_diff_only": 0.273100009560585, |
| "tpp_threshold_20_total_metric": 0.12974999845027924, |
| "tpp_threshold_20_intended_diff_only": 0.4474000096321106, |
| "tpp_threshold_20_unintended_diff_only": 0.31765001118183134, |
| "tpp_threshold_50_total_metric": 0.09285003244876862, |
| "tpp_threshold_50_intended_diff_only": 0.459600043296814, |
| "tpp_threshold_50_unintended_diff_only": 0.36675001084804537, |
| "tpp_threshold_100_total_metric": 0.07710002362728119, |
| "tpp_threshold_100_intended_diff_only": 0.46060004234313967, |
| "tpp_threshold_100_unintended_diff_only": 0.3835000187158585, |
| "tpp_threshold_500_total_metric": 0.05380002558231354, |
| "tpp_threshold_500_intended_diff_only": 0.46060004234313967, |
| "tpp_threshold_500_unintended_diff_only": 0.4068000167608261 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.021199998259544373, |
| "tpp_threshold_2_intended_diff_only": 0.023799991607666014, |
| "tpp_threshold_2_unintended_diff_only": 0.002599993348121643, |
| "tpp_threshold_5_total_metric": 0.044650009274482726, |
| "tpp_threshold_5_intended_diff_only": 0.05140000581741333, |
| "tpp_threshold_5_unintended_diff_only": 0.006749996542930603, |
| "tpp_threshold_10_total_metric": 0.08464999496936798, |
| "tpp_threshold_10_intended_diff_only": 0.09559999704360962, |
| "tpp_threshold_10_unintended_diff_only": 0.010950002074241637, |
| "tpp_threshold_20_total_metric": 0.19244999587535858, |
| "tpp_threshold_20_intended_diff_only": 0.2162000060081482, |
| "tpp_threshold_20_unintended_diff_only": 0.02375001013278961, |
| "tpp_threshold_50_total_metric": 0.31465000212192534, |
| "tpp_threshold_50_intended_diff_only": 0.36520000696182253, |
| "tpp_threshold_50_unintended_diff_only": 0.050550004839897154, |
| "tpp_threshold_100_total_metric": 0.31530002057552337, |
| "tpp_threshold_100_intended_diff_only": 0.3874000310897827, |
| "tpp_threshold_100_unintended_diff_only": 0.07210001051425934, |
| "tpp_threshold_500_total_metric": 0.20885003805160524, |
| "tpp_threshold_500_intended_diff_only": 0.388200044631958, |
| "tpp_threshold_500_unintended_diff_only": 0.17935000658035277 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.0.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.0.hook_resid_post", |
| "hook_layer": 0, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1887500137090683, |
| "tpp_threshold_2_intended_diff_only": 0.2630000114440918, |
| "tpp_threshold_2_unintended_diff_only": 0.0742499977350235, |
| "tpp_threshold_5_total_metric": 0.12175001204013824, |
| "tpp_threshold_5_intended_diff_only": 0.37800002098083496, |
| "tpp_threshold_5_unintended_diff_only": 0.2562500089406967, |
| "tpp_threshold_10_total_metric": 0.11400002241134644, |
| "tpp_threshold_10_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_10_unintended_diff_only": 0.28600001335144043, |
| "tpp_threshold_20_total_metric": 0.08075003325939178, |
| "tpp_threshold_20_intended_diff_only": 0.4150000214576721, |
| "tpp_threshold_20_unintended_diff_only": 0.33424998819828033, |
| "tpp_threshold_50_total_metric": 0.06850007176399231, |
| "tpp_threshold_50_intended_diff_only": 0.43400007486343384, |
| "tpp_threshold_50_unintended_diff_only": 0.36550000309944153, |
| "tpp_threshold_100_total_metric": 0.059000059962272644, |
| "tpp_threshold_100_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_100_unintended_diff_only": 0.3800000101327896, |
| "tpp_threshold_500_total_metric": 0.029500067234039307, |
| "tpp_threshold_500_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_500_unintended_diff_only": 0.40950000286102295 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.125, |
| "tpp_threshold_2_intended_diff_only": 0.3410000205039978, |
| "tpp_threshold_2_unintended_diff_only": 0.2160000205039978, |
| "tpp_threshold_5_total_metric": 0.11300003528594971, |
| "tpp_threshold_5_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_5_unintended_diff_only": 0.2540000081062317, |
| "tpp_threshold_10_total_metric": 0.12025003135204315, |
| "tpp_threshold_10_intended_diff_only": 0.4230000376701355, |
| "tpp_threshold_10_unintended_diff_only": 0.30275000631809235, |
| "tpp_threshold_20_total_metric": 0.11650000512599945, |
| "tpp_threshold_20_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_20_unintended_diff_only": 0.32750000059604645, |
| "tpp_threshold_50_total_metric": 0.08225002884864807, |
| "tpp_threshold_50_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_50_unintended_diff_only": 0.37275001406669617, |
| "tpp_threshold_100_total_metric": 0.059000030159950256, |
| "tpp_threshold_100_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_100_unintended_diff_only": 0.396000012755394, |
| "tpp_threshold_500_total_metric": 0.041250020265579224, |
| "tpp_threshold_500_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_500_unintended_diff_only": 0.413750022649765 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.18049998581409454, |
| "tpp_threshold_2_intended_diff_only": 0.27399998903274536, |
| "tpp_threshold_2_unintended_diff_only": 0.09350000321865082, |
| "tpp_threshold_5_total_metric": 0.18949997425079346, |
| "tpp_threshold_5_intended_diff_only": 0.40799999237060547, |
| "tpp_threshold_5_unintended_diff_only": 0.218500018119812, |
| "tpp_threshold_10_total_metric": 0.14399999380111694, |
| "tpp_threshold_10_intended_diff_only": 0.4300000071525574, |
| "tpp_threshold_10_unintended_diff_only": 0.28600001335144043, |
| "tpp_threshold_20_total_metric": 0.12575000524520874, |
| "tpp_threshold_20_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_20_unintended_diff_only": 0.3212500214576721, |
| "tpp_threshold_50_total_metric": 0.07450000941753387, |
| "tpp_threshold_50_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_50_unintended_diff_only": 0.3765000253915787, |
| "tpp_threshold_100_total_metric": 0.054750025272369385, |
| "tpp_threshold_100_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_100_unintended_diff_only": 0.39625000953674316, |
| "tpp_threshold_500_total_metric": 0.03275001049041748, |
| "tpp_threshold_500_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_500_unintended_diff_only": 0.41825002431869507 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.2005000114440918, |
| "tpp_threshold_2_intended_diff_only": 0.3400000333786011, |
| "tpp_threshold_2_unintended_diff_only": 0.13950002193450928, |
| "tpp_threshold_5_total_metric": 0.20099999010562897, |
| "tpp_threshold_5_intended_diff_only": 0.4350000023841858, |
| "tpp_threshold_5_unintended_diff_only": 0.23400001227855682, |
| "tpp_threshold_10_total_metric": 0.2290000170469284, |
| "tpp_threshold_10_intended_diff_only": 0.4620000123977661, |
| "tpp_threshold_10_unintended_diff_only": 0.2329999953508377, |
| "tpp_threshold_20_total_metric": 0.18174998462200165, |
| "tpp_threshold_20_intended_diff_only": 0.48100000619888306, |
| "tpp_threshold_20_unintended_diff_only": 0.2992500215768814, |
| "tpp_threshold_50_total_metric": 0.13200002908706665, |
| "tpp_threshold_50_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_50_unintended_diff_only": 0.35600000619888306, |
| "tpp_threshold_100_total_metric": 0.12200000882148743, |
| "tpp_threshold_100_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_100_unintended_diff_only": 0.3660000264644623, |
| "tpp_threshold_500_total_metric": 0.09725001454353333, |
| "tpp_threshold_500_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_500_unintended_diff_only": 0.3907500207424164 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.16099999845027924, |
| "tpp_threshold_2_intended_diff_only": 0.29600000381469727, |
| "tpp_threshold_2_unintended_diff_only": 0.13500000536441803, |
| "tpp_threshold_5_total_metric": 0.1850000023841858, |
| "tpp_threshold_5_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.16200000047683716, |
| "tpp_threshold_10_total_metric": 0.14024998247623444, |
| "tpp_threshold_10_intended_diff_only": 0.39800000190734863, |
| "tpp_threshold_10_unintended_diff_only": 0.2577500194311142, |
| "tpp_threshold_20_total_metric": 0.14399996399879456, |
| "tpp_threshold_20_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_20_unintended_diff_only": 0.3060000240802765, |
| "tpp_threshold_50_total_metric": 0.10700002312660217, |
| "tpp_threshold_50_intended_diff_only": 0.4700000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.3630000054836273, |
| "tpp_threshold_100_total_metric": 0.09074999392032623, |
| "tpp_threshold_100_intended_diff_only": 0.4700000286102295, |
| "tpp_threshold_100_unintended_diff_only": 0.37925003468990326, |
| "tpp_threshold_500_total_metric": 0.06825001537799835, |
| "tpp_threshold_500_intended_diff_only": 0.4700000286102295, |
| "tpp_threshold_500_unintended_diff_only": 0.40175001323223114 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.020249977707862854, |
| "tpp_threshold_2_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_2_unintended_diff_only": -0.00024999678134918213, |
| "tpp_threshold_5_total_metric": 0.02875000238418579, |
| "tpp_threshold_5_intended_diff_only": 0.02799999713897705, |
| "tpp_threshold_5_unintended_diff_only": -0.0007500052452087402, |
| "tpp_threshold_10_total_metric": 0.0429999977350235, |
| "tpp_threshold_10_intended_diff_only": 0.050999999046325684, |
| "tpp_threshold_10_unintended_diff_only": 0.008000001311302185, |
| "tpp_threshold_20_total_metric": 0.15474997460842133, |
| "tpp_threshold_20_intended_diff_only": 0.18199998140335083, |
| "tpp_threshold_20_unintended_diff_only": 0.027250006794929504, |
| "tpp_threshold_50_total_metric": 0.33799998462200165, |
| "tpp_threshold_50_intended_diff_only": 0.40799999237060547, |
| "tpp_threshold_50_unintended_diff_only": 0.07000000774860382, |
| "tpp_threshold_100_total_metric": 0.31150002777576447, |
| "tpp_threshold_100_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_100_unintended_diff_only": 0.11550001800060272, |
| "tpp_threshold_500_total_metric": 0.20225003361701965, |
| "tpp_threshold_500_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_500_unintended_diff_only": 0.22475001215934753 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.054750025272369385, |
| "tpp_threshold_2_intended_diff_only": 0.05900001525878906, |
| "tpp_threshold_2_unintended_diff_only": 0.004249989986419678, |
| "tpp_threshold_5_total_metric": 0.09150001406669617, |
| "tpp_threshold_5_intended_diff_only": 0.10100001096725464, |
| "tpp_threshold_5_unintended_diff_only": 0.009499996900558472, |
| "tpp_threshold_10_total_metric": 0.18975000083446503, |
| "tpp_threshold_10_intended_diff_only": 0.20800000429153442, |
| "tpp_threshold_10_unintended_diff_only": 0.018250003457069397, |
| "tpp_threshold_20_total_metric": 0.28825001418590546, |
| "tpp_threshold_20_intended_diff_only": 0.3320000171661377, |
| "tpp_threshold_20_unintended_diff_only": 0.04375000298023224, |
| "tpp_threshold_50_total_metric": 0.3400000333786011, |
| "tpp_threshold_50_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_50_unintended_diff_only": 0.06800001859664917, |
| "tpp_threshold_100_total_metric": 0.3175000548362732, |
| "tpp_threshold_100_intended_diff_only": 0.41200006008148193, |
| "tpp_threshold_100_unintended_diff_only": 0.09450000524520874, |
| "tpp_threshold_500_total_metric": 0.19450005888938904, |
| "tpp_threshold_500_intended_diff_only": 0.41200006008148193, |
| "tpp_threshold_500_unintended_diff_only": 0.2175000011920929 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.017749980092048645, |
| "tpp_threshold_2_intended_diff_only": 0.015999972820281982, |
| "tpp_threshold_2_unintended_diff_only": -0.0017500072717666626, |
| "tpp_threshold_5_total_metric": 0.04949997365474701, |
| "tpp_threshold_5_intended_diff_only": 0.06199997663497925, |
| "tpp_threshold_5_unintended_diff_only": 0.012500002980232239, |
| "tpp_threshold_10_total_metric": 0.0627499669790268, |
| "tpp_threshold_10_intended_diff_only": 0.0769999623298645, |
| "tpp_threshold_10_unintended_diff_only": 0.014249995350837708, |
| "tpp_threshold_20_total_metric": 0.13799996674060822, |
| "tpp_threshold_20_intended_diff_only": 0.15399998426437378, |
| "tpp_threshold_20_unintended_diff_only": 0.016000017523765564, |
| "tpp_threshold_50_total_metric": 0.3114999830722809, |
| "tpp_threshold_50_intended_diff_only": 0.3349999785423279, |
| "tpp_threshold_50_unintended_diff_only": 0.023499995470046997, |
| "tpp_threshold_100_total_metric": 0.3369999825954437, |
| "tpp_threshold_100_intended_diff_only": 0.3840000033378601, |
| "tpp_threshold_100_unintended_diff_only": 0.04700002074241638, |
| "tpp_threshold_500_total_metric": 0.22350001335144043, |
| "tpp_threshold_500_intended_diff_only": 0.38700002431869507, |
| "tpp_threshold_500_unintended_diff_only": 0.16350001096725464 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.01799999177455902, |
| "tpp_threshold_2_intended_diff_only": 0.02399998903274536, |
| "tpp_threshold_2_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_5_total_metric": 0.01150001585483551, |
| "tpp_threshold_5_intended_diff_only": 0.017000019550323486, |
| "tpp_threshold_5_unintended_diff_only": 0.005500003695487976, |
| "tpp_threshold_10_total_metric": 0.04475000500679016, |
| "tpp_threshold_10_intended_diff_only": 0.046000003814697266, |
| "tpp_threshold_10_unintended_diff_only": 0.0012499988079071045, |
| "tpp_threshold_20_total_metric": 0.2502500116825104, |
| "tpp_threshold_20_intended_diff_only": 0.2710000276565552, |
| "tpp_threshold_20_unintended_diff_only": 0.0207500159740448, |
| "tpp_threshold_50_total_metric": 0.3310000151395798, |
| "tpp_threshold_50_intended_diff_only": 0.36900001764297485, |
| "tpp_threshold_50_unintended_diff_only": 0.03800000250339508, |
| "tpp_threshold_100_total_metric": 0.34525004029273987, |
| "tpp_threshold_100_intended_diff_only": 0.39000004529953003, |
| "tpp_threshold_100_unintended_diff_only": 0.04475000500679016, |
| "tpp_threshold_500_total_metric": 0.24400003254413605, |
| "tpp_threshold_500_intended_diff_only": 0.39000004529953003, |
| "tpp_threshold_500_unintended_diff_only": 0.14600001275539398 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": -0.004749983549118042, |
| "tpp_threshold_2_intended_diff_only": 0.0, |
| "tpp_threshold_2_unintended_diff_only": 0.004749983549118042, |
| "tpp_threshold_5_total_metric": 0.04200004041194916, |
| "tpp_threshold_5_intended_diff_only": 0.04900002479553223, |
| "tpp_threshold_5_unintended_diff_only": 0.006999984383583069, |
| "tpp_threshold_10_total_metric": 0.08300000429153442, |
| "tpp_threshold_10_intended_diff_only": 0.09600001573562622, |
| "tpp_threshold_10_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_20_total_metric": 0.13100001215934753, |
| "tpp_threshold_20_intended_diff_only": 0.1420000195503235, |
| "tpp_threshold_20_unintended_diff_only": 0.011000007390975952, |
| "tpp_threshold_50_total_metric": 0.2527499943971634, |
| "tpp_threshold_50_intended_diff_only": 0.3059999942779541, |
| "tpp_threshold_50_unintended_diff_only": 0.05324999988079071, |
| "tpp_threshold_100_total_metric": 0.26524999737739563, |
| "tpp_threshold_100_intended_diff_only": 0.3240000009536743, |
| "tpp_threshold_100_unintended_diff_only": 0.058750003576278687, |
| "tpp_threshold_500_total_metric": 0.18000005185604095, |
| "tpp_threshold_500_intended_diff_only": 0.3250000476837158, |
| "tpp_threshold_500_unintended_diff_only": 0.14499999582767487 |
| } |
| } |
| } |
| } |