| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752992442, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.059574995934963224, |
| "tpp_threshold_2_intended_diff_only": 0.0788999915122986, |
| "tpp_threshold_2_unintended_diff_only": 0.019324995577335358, |
| "tpp_threshold_5_total_metric": 0.15705001056194307, |
| "tpp_threshold_5_intended_diff_only": 0.20260000228881836, |
| "tpp_threshold_5_unintended_diff_only": 0.04554999172687531, |
| "tpp_threshold_10_total_metric": 0.2019250050187111, |
| "tpp_threshold_10_intended_diff_only": 0.27759999632835386, |
| "tpp_threshold_10_unintended_diff_only": 0.0756749913096428, |
| "tpp_threshold_20_total_metric": 0.21412501335144044, |
| "tpp_threshold_20_intended_diff_only": 0.33360000848770144, |
| "tpp_threshold_20_unintended_diff_only": 0.11947499513626098, |
| "tpp_threshold_50_total_metric": 0.23667500466108324, |
| "tpp_threshold_50_intended_diff_only": 0.4005000054836273, |
| "tpp_threshold_50_unintended_diff_only": 0.16382500082254411, |
| "tpp_threshold_100_total_metric": 0.2431000158190727, |
| "tpp_threshold_100_intended_diff_only": 0.4371000111103058, |
| "tpp_threshold_100_unintended_diff_only": 0.19399999529123307, |
| "tpp_threshold_500_total_metric": 0.2143500313162804, |
| "tpp_threshold_500_intended_diff_only": 0.45040003061294553, |
| "tpp_threshold_500_unintended_diff_only": 0.2360499992966652 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.09199999272823334, |
| "tpp_threshold_2_intended_diff_only": 0.11719999313354493, |
| "tpp_threshold_2_unintended_diff_only": 0.025200000405311583, |
| "tpp_threshold_5_total_metric": 0.22155002653598785, |
| "tpp_threshold_5_intended_diff_only": 0.28800002336502073, |
| "tpp_threshold_5_unintended_diff_only": 0.0664499968290329, |
| "tpp_threshold_10_total_metric": 0.264300012588501, |
| "tpp_threshold_10_intended_diff_only": 0.3862000107765198, |
| "tpp_threshold_10_unintended_diff_only": 0.1218999981880188, |
| "tpp_threshold_20_total_metric": 0.23185001015663148, |
| "tpp_threshold_20_intended_diff_only": 0.4358000159263611, |
| "tpp_threshold_20_unintended_diff_only": 0.2039500057697296, |
| "tpp_threshold_50_total_metric": 0.18375001549720765, |
| "tpp_threshold_50_intended_diff_only": 0.4598000288009644, |
| "tpp_threshold_50_unintended_diff_only": 0.2760500133037567, |
| "tpp_threshold_100_total_metric": 0.15035001337528228, |
| "tpp_threshold_100_intended_diff_only": 0.4650000214576721, |
| "tpp_threshold_100_unintended_diff_only": 0.31465000808238985, |
| "tpp_threshold_500_total_metric": 0.09015003442764283, |
| "tpp_threshold_500_intended_diff_only": 0.46620004177093505, |
| "tpp_threshold_500_unintended_diff_only": 0.37605000734329225 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.027149999141693117, |
| "tpp_threshold_2_intended_diff_only": 0.04059998989105225, |
| "tpp_threshold_2_unintended_diff_only": 0.01344999074935913, |
| "tpp_threshold_5_total_metric": 0.09254999458789825, |
| "tpp_threshold_5_intended_diff_only": 0.11719998121261596, |
| "tpp_threshold_5_unintended_diff_only": 0.024649986624717714, |
| "tpp_threshold_10_total_metric": 0.13954999744892121, |
| "tpp_threshold_10_intended_diff_only": 0.168999981880188, |
| "tpp_threshold_10_unintended_diff_only": 0.029449984431266785, |
| "tpp_threshold_20_total_metric": 0.1964000165462494, |
| "tpp_threshold_20_intended_diff_only": 0.23140000104904174, |
| "tpp_threshold_20_unintended_diff_only": 0.03499998450279236, |
| "tpp_threshold_50_total_metric": 0.2895999938249588, |
| "tpp_threshold_50_intended_diff_only": 0.34119998216629027, |
| "tpp_threshold_50_unintended_diff_only": 0.05159998834133148, |
| "tpp_threshold_100_total_metric": 0.33585001826286315, |
| "tpp_threshold_100_intended_diff_only": 0.40920000076293944, |
| "tpp_threshold_100_unintended_diff_only": 0.0733499825000763, |
| "tpp_threshold_500_total_metric": 0.3385500282049179, |
| "tpp_threshold_500_intended_diff_only": 0.4346000194549561, |
| "tpp_threshold_500_unintended_diff_only": 0.09604999125003814 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.21.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.21.hook_resid_post", |
| "hook_layer": 21, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.2122499942779541, |
| "tpp_threshold_2_intended_diff_only": 0.25, |
| "tpp_threshold_2_unintended_diff_only": 0.0377500057220459, |
| "tpp_threshold_5_total_metric": 0.2305000126361847, |
| "tpp_threshold_5_intended_diff_only": 0.29600000381469727, |
| "tpp_threshold_5_unintended_diff_only": 0.06549999117851257, |
| "tpp_threshold_10_total_metric": 0.23225004971027374, |
| "tpp_threshold_10_intended_diff_only": 0.35300004482269287, |
| "tpp_threshold_10_unintended_diff_only": 0.12074999511241913, |
| "tpp_threshold_20_total_metric": 0.22049999237060547, |
| "tpp_threshold_20_intended_diff_only": 0.41600000858306885, |
| "tpp_threshold_20_unintended_diff_only": 0.19550001621246338, |
| "tpp_threshold_50_total_metric": 0.16450002789497375, |
| "tpp_threshold_50_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_50_unintended_diff_only": 0.27250000834465027, |
| "tpp_threshold_100_total_metric": 0.13600000739097595, |
| "tpp_threshold_100_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_100_unintended_diff_only": 0.31200000643730164, |
| "tpp_threshold_500_total_metric": 0.06825004518032074, |
| "tpp_threshold_500_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_500_unintended_diff_only": 0.3817500025033951 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.0820000171661377, |
| "tpp_threshold_2_intended_diff_only": 0.11000001430511475, |
| "tpp_threshold_2_unintended_diff_only": 0.02799999713897705, |
| "tpp_threshold_5_total_metric": 0.20625002682209015, |
| "tpp_threshold_5_intended_diff_only": 0.2800000309944153, |
| "tpp_threshold_5_unintended_diff_only": 0.07375000417232513, |
| "tpp_threshold_10_total_metric": 0.2462500035762787, |
| "tpp_threshold_10_intended_diff_only": 0.36000001430511475, |
| "tpp_threshold_10_unintended_diff_only": 0.11375001072883606, |
| "tpp_threshold_20_total_metric": 0.20375002920627594, |
| "tpp_threshold_20_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_20_unintended_diff_only": 0.21525000035762787, |
| "tpp_threshold_50_total_metric": 0.16825003921985626, |
| "tpp_threshold_50_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_50_unintended_diff_only": 0.286750003695488, |
| "tpp_threshold_100_total_metric": 0.1392500102519989, |
| "tpp_threshold_100_intended_diff_only": 0.4620000123977661, |
| "tpp_threshold_100_unintended_diff_only": 0.3227500021457672, |
| "tpp_threshold_500_total_metric": 0.08075003325939178, |
| "tpp_threshold_500_intended_diff_only": 0.46400004625320435, |
| "tpp_threshold_500_unintended_diff_only": 0.38325001299381256 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.11449995636940002, |
| "tpp_threshold_2_intended_diff_only": 0.15599995851516724, |
| "tpp_threshold_2_unintended_diff_only": 0.04150000214576721, |
| "tpp_threshold_5_total_metric": 0.23225000500679016, |
| "tpp_threshold_5_intended_diff_only": 0.31800001859664917, |
| "tpp_threshold_5_unintended_diff_only": 0.08575001358985901, |
| "tpp_threshold_10_total_metric": 0.26524998247623444, |
| "tpp_threshold_10_intended_diff_only": 0.39499998092651367, |
| "tpp_threshold_10_unintended_diff_only": 0.12974999845027924, |
| "tpp_threshold_20_total_metric": 0.25599996745586395, |
| "tpp_threshold_20_intended_diff_only": 0.4269999861717224, |
| "tpp_threshold_20_unintended_diff_only": 0.17100001871585846, |
| "tpp_threshold_50_total_metric": 0.17874997854232788, |
| "tpp_threshold_50_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_50_unintended_diff_only": 0.265250027179718, |
| "tpp_threshold_100_total_metric": 0.14099998772144318, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.30900000035762787, |
| "tpp_threshold_500_total_metric": 0.057500019669532776, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.3945000022649765 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.044249966740608215, |
| "tpp_threshold_2_intended_diff_only": 0.05299997329711914, |
| "tpp_threshold_2_unintended_diff_only": 0.008750006556510925, |
| "tpp_threshold_5_total_metric": 0.30100002884864807, |
| "tpp_threshold_5_intended_diff_only": 0.359000027179718, |
| "tpp_threshold_5_unintended_diff_only": 0.057999998331069946, |
| "tpp_threshold_10_total_metric": 0.33550000190734863, |
| "tpp_threshold_10_intended_diff_only": 0.4860000014305115, |
| "tpp_threshold_10_unintended_diff_only": 0.15049999952316284, |
| "tpp_threshold_20_total_metric": 0.25950004160404205, |
| "tpp_threshold_20_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_20_unintended_diff_only": 0.23250000178813934, |
| "tpp_threshold_50_total_metric": 0.19475002586841583, |
| "tpp_threshold_50_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_50_unintended_diff_only": 0.29725001752376556, |
| "tpp_threshold_100_total_metric": 0.17925003170967102, |
| "tpp_threshold_100_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_100_unintended_diff_only": 0.3127500116825104, |
| "tpp_threshold_500_total_metric": 0.13875003159046173, |
| "tpp_threshold_500_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.35325001180171967 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.00700002908706665, |
| "tpp_threshold_2_intended_diff_only": 0.017000019550323486, |
| "tpp_threshold_2_unintended_diff_only": 0.009999990463256836, |
| "tpp_threshold_5_total_metric": 0.1377500593662262, |
| "tpp_threshold_5_intended_diff_only": 0.18700003623962402, |
| "tpp_threshold_5_unintended_diff_only": 0.04924997687339783, |
| "tpp_threshold_10_total_metric": 0.24225002527236938, |
| "tpp_threshold_10_intended_diff_only": 0.3370000123977661, |
| "tpp_threshold_10_unintended_diff_only": 0.09474998712539673, |
| "tpp_threshold_20_total_metric": 0.21950002014636993, |
| "tpp_threshold_20_intended_diff_only": 0.42500001192092896, |
| "tpp_threshold_20_unintended_diff_only": 0.20549999177455902, |
| "tpp_threshold_50_total_metric": 0.21250000596046448, |
| "tpp_threshold_50_intended_diff_only": 0.4710000157356262, |
| "tpp_threshold_50_unintended_diff_only": 0.25850000977516174, |
| "tpp_threshold_100_total_metric": 0.1562500298023224, |
| "tpp_threshold_100_intended_diff_only": 0.47300004959106445, |
| "tpp_threshold_100_unintended_diff_only": 0.31675001978874207, |
| "tpp_threshold_500_total_metric": 0.10550004243850708, |
| "tpp_threshold_500_intended_diff_only": 0.47300004959106445, |
| "tpp_threshold_500_unintended_diff_only": 0.3675000071525574 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": -0.0032500028610229492, |
| "tpp_threshold_2_intended_diff_only": 0.004999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.008249998092651367, |
| "tpp_threshold_5_total_metric": 0.005000010132789612, |
| "tpp_threshold_5_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_5_unintended_diff_only": 0.008999988436698914, |
| "tpp_threshold_10_total_metric": 0.018749982118606567, |
| "tpp_threshold_10_intended_diff_only": 0.030999958515167236, |
| "tpp_threshold_10_unintended_diff_only": 0.012249976396560669, |
| "tpp_threshold_20_total_metric": 0.042750030755996704, |
| "tpp_threshold_20_intended_diff_only": 0.06000000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.017249971628189087, |
| "tpp_threshold_50_total_metric": 0.17274995148181915, |
| "tpp_threshold_50_intended_diff_only": 0.21199995279312134, |
| "tpp_threshold_50_unintended_diff_only": 0.039250001311302185, |
| "tpp_threshold_100_total_metric": 0.30774998664855957, |
| "tpp_threshold_100_intended_diff_only": 0.3539999723434448, |
| "tpp_threshold_100_unintended_diff_only": 0.046249985694885254, |
| "tpp_threshold_500_total_metric": 0.39149996638298035, |
| "tpp_threshold_500_intended_diff_only": 0.4559999704360962, |
| "tpp_threshold_500_unintended_diff_only": 0.06450000405311584 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.05175003409385681, |
| "tpp_threshold_2_intended_diff_only": 0.06400001049041748, |
| "tpp_threshold_2_unintended_diff_only": 0.012249976396560669, |
| "tpp_threshold_5_total_metric": 0.16349998116493225, |
| "tpp_threshold_5_intended_diff_only": 0.21999996900558472, |
| "tpp_threshold_5_unintended_diff_only": 0.056499987840652466, |
| "tpp_threshold_10_total_metric": 0.2589999884366989, |
| "tpp_threshold_10_intended_diff_only": 0.31599998474121094, |
| "tpp_threshold_10_unintended_diff_only": 0.056999996304512024, |
| "tpp_threshold_20_total_metric": 0.34175004065036774, |
| "tpp_threshold_20_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_20_unintended_diff_only": 0.06324999034404755, |
| "tpp_threshold_50_total_metric": 0.3582500219345093, |
| "tpp_threshold_50_intended_diff_only": 0.4390000104904175, |
| "tpp_threshold_50_unintended_diff_only": 0.0807499885559082, |
| "tpp_threshold_100_total_metric": 0.3477500379085541, |
| "tpp_threshold_100_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_100_unintended_diff_only": 0.09524998068809509, |
| "tpp_threshold_500_total_metric": 0.32600004971027374, |
| "tpp_threshold_500_intended_diff_only": 0.44200003147125244, |
| "tpp_threshold_500_unintended_diff_only": 0.1159999817609787 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.01825004816055298, |
| "tpp_threshold_2_intended_diff_only": -0.010000050067901611, |
| "tpp_threshold_2_unintended_diff_only": 0.008249998092651367, |
| "tpp_threshold_5_total_metric": -0.0102500319480896, |
| "tpp_threshold_5_intended_diff_only": -0.001000046730041504, |
| "tpp_threshold_5_unintended_diff_only": 0.009249985218048096, |
| "tpp_threshold_10_total_metric": 0.013499975204467773, |
| "tpp_threshold_10_intended_diff_only": 0.02199995517730713, |
| "tpp_threshold_10_unintended_diff_only": 0.008499979972839355, |
| "tpp_threshold_20_total_metric": 0.08674998581409454, |
| "tpp_threshold_20_intended_diff_only": 0.10799998044967651, |
| "tpp_threshold_20_unintended_diff_only": 0.02124999463558197, |
| "tpp_threshold_50_total_metric": 0.23349998891353607, |
| "tpp_threshold_50_intended_diff_only": 0.2889999747276306, |
| "tpp_threshold_50_unintended_diff_only": 0.05549998581409454, |
| "tpp_threshold_100_total_metric": 0.30025000870227814, |
| "tpp_threshold_100_intended_diff_only": 0.4179999828338623, |
| "tpp_threshold_100_unintended_diff_only": 0.11774997413158417, |
| "tpp_threshold_500_total_metric": 0.2742500305175781, |
| "tpp_threshold_500_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_500_unintended_diff_only": 0.15474998950958252 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.046500012278556824, |
| "tpp_threshold_2_intended_diff_only": 0.050999999046325684, |
| "tpp_threshold_2_unintended_diff_only": 0.00449998676776886, |
| "tpp_threshold_5_total_metric": 0.21825003623962402, |
| "tpp_threshold_5_intended_diff_only": 0.23500001430511475, |
| "tpp_threshold_5_unintended_diff_only": 0.016749978065490723, |
| "tpp_threshold_10_total_metric": 0.24275004863739014, |
| "tpp_threshold_10_intended_diff_only": 0.27500003576278687, |
| "tpp_threshold_10_unintended_diff_only": 0.03224998712539673, |
| "tpp_threshold_20_total_metric": 0.2860000431537628, |
| "tpp_threshold_20_intended_diff_only": 0.31800001859664917, |
| "tpp_threshold_20_unintended_diff_only": 0.03199997544288635, |
| "tpp_threshold_50_total_metric": 0.3607500195503235, |
| "tpp_threshold_50_intended_diff_only": 0.39800000190734863, |
| "tpp_threshold_50_unintended_diff_only": 0.037249982357025146, |
| "tpp_threshold_100_total_metric": 0.3812500238418579, |
| "tpp_threshold_100_intended_diff_only": 0.4300000071525574, |
| "tpp_threshold_100_unintended_diff_only": 0.04874998331069946, |
| "tpp_threshold_500_total_metric": 0.37300005555152893, |
| "tpp_threshold_500_intended_diff_only": 0.4360000491142273, |
| "tpp_threshold_500_unintended_diff_only": 0.06299999356269836 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.05900000035762787, |
| "tpp_threshold_2_intended_diff_only": 0.09299999475479126, |
| "tpp_threshold_2_unintended_diff_only": 0.03399999439716339, |
| "tpp_threshold_5_total_metric": 0.08624997735023499, |
| "tpp_threshold_5_intended_diff_only": 0.11799997091293335, |
| "tpp_threshold_5_unintended_diff_only": 0.031749993562698364, |
| "tpp_threshold_10_total_metric": 0.16374999284744263, |
| "tpp_threshold_10_intended_diff_only": 0.20099997520446777, |
| "tpp_threshold_10_unintended_diff_only": 0.037249982357025146, |
| "tpp_threshold_20_total_metric": 0.22474998235702515, |
| "tpp_threshold_20_intended_diff_only": 0.265999972820282, |
| "tpp_threshold_20_unintended_diff_only": 0.041249990463256836, |
| "tpp_threshold_50_total_metric": 0.322749987244606, |
| "tpp_threshold_50_intended_diff_only": 0.36799997091293335, |
| "tpp_threshold_50_unintended_diff_only": 0.04524998366832733, |
| "tpp_threshold_100_total_metric": 0.3422500342130661, |
| "tpp_threshold_100_intended_diff_only": 0.4010000228881836, |
| "tpp_threshold_100_unintended_diff_only": 0.05874998867511749, |
| "tpp_threshold_500_total_metric": 0.3280000388622284, |
| "tpp_threshold_500_intended_diff_only": 0.4100000262260437, |
| "tpp_threshold_500_unintended_diff_only": 0.08199998736381531 |
| } |
| } |
| } |
| } |