| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752462281, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0680250197649002, |
| "tpp_threshold_2_intended_diff_only": 0.09170001745223999, |
| "tpp_threshold_2_unintended_diff_only": 0.023674997687339782, |
| "tpp_threshold_5_total_metric": 0.11612500995397568, |
| "tpp_threshold_5_intended_diff_only": 0.15900000929832458, |
| "tpp_threshold_5_unintended_diff_only": 0.04287499934434891, |
| "tpp_threshold_10_total_metric": 0.13417500257492065, |
| "tpp_threshold_10_intended_diff_only": 0.21810000538825988, |
| "tpp_threshold_10_unintended_diff_only": 0.08392500281333923, |
| "tpp_threshold_20_total_metric": 0.13597500026226045, |
| "tpp_threshold_20_intended_diff_only": 0.2552000105381012, |
| "tpp_threshold_20_unintended_diff_only": 0.11922501027584076, |
| "tpp_threshold_50_total_metric": 0.17435001432895658, |
| "tpp_threshold_50_intended_diff_only": 0.328900021314621, |
| "tpp_threshold_50_unintended_diff_only": 0.15455000698566437, |
| "tpp_threshold_100_total_metric": 0.21375000923871995, |
| "tpp_threshold_100_intended_diff_only": 0.3948000192642212, |
| "tpp_threshold_100_unintended_diff_only": 0.18105001002550125, |
| "tpp_threshold_500_total_metric": 0.21017503440380098, |
| "tpp_threshold_500_intended_diff_only": 0.44580004215240476, |
| "tpp_threshold_500_unintended_diff_only": 0.23562500774860384 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.12105002105236054, |
| "tpp_threshold_2_intended_diff_only": 0.16440001726150513, |
| "tpp_threshold_2_unintended_diff_only": 0.04334999620914459, |
| "tpp_threshold_5_total_metric": 0.1975000113248825, |
| "tpp_threshold_5_intended_diff_only": 0.27960001230239867, |
| "tpp_threshold_5_unintended_diff_only": 0.08210000097751617, |
| "tpp_threshold_10_total_metric": 0.20915000140666962, |
| "tpp_threshold_10_intended_diff_only": 0.3690000057220459, |
| "tpp_threshold_10_unintended_diff_only": 0.15985000431537627, |
| "tpp_threshold_20_total_metric": 0.18919999301433563, |
| "tpp_threshold_20_intended_diff_only": 0.41540000438690183, |
| "tpp_threshold_20_unintended_diff_only": 0.22620001137256623, |
| "tpp_threshold_50_total_metric": 0.16360001564025878, |
| "tpp_threshold_50_intended_diff_only": 0.4516000270843506, |
| "tpp_threshold_50_unintended_diff_only": 0.2880000114440918, |
| "tpp_threshold_100_total_metric": 0.13335000872612, |
| "tpp_threshold_100_intended_diff_only": 0.4648000240325928, |
| "tpp_threshold_100_unintended_diff_only": 0.3314500153064728, |
| "tpp_threshold_500_total_metric": 0.07870003283023834, |
| "tpp_threshold_500_intended_diff_only": 0.46920003890991213, |
| "tpp_threshold_500_unintended_diff_only": 0.3905000060796738 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.01500001847743988, |
| "tpp_threshold_2_intended_diff_only": 0.019000017642974855, |
| "tpp_threshold_2_unintended_diff_only": 0.0039999991655349735, |
| "tpp_threshold_5_total_metric": 0.03475000858306885, |
| "tpp_threshold_5_intended_diff_only": 0.038400006294250486, |
| "tpp_threshold_5_unintended_diff_only": 0.003649997711181641, |
| "tpp_threshold_10_total_metric": 0.05920000374317169, |
| "tpp_threshold_10_intended_diff_only": 0.06720000505447388, |
| "tpp_threshold_10_unintended_diff_only": 0.008000001311302185, |
| "tpp_threshold_20_total_metric": 0.08275000751018524, |
| "tpp_threshold_20_intended_diff_only": 0.09500001668930054, |
| "tpp_threshold_20_unintended_diff_only": 0.012250009179115295, |
| "tpp_threshold_50_total_metric": 0.1851000130176544, |
| "tpp_threshold_50_intended_diff_only": 0.20620001554489137, |
| "tpp_threshold_50_unintended_diff_only": 0.021100002527236938, |
| "tpp_threshold_100_total_metric": 0.2941500097513199, |
| "tpp_threshold_100_intended_diff_only": 0.3248000144958496, |
| "tpp_threshold_100_unintended_diff_only": 0.030650004744529724, |
| "tpp_threshold_500_total_metric": 0.3416500359773636, |
| "tpp_threshold_500_intended_diff_only": 0.42240004539489745, |
| "tpp_threshold_500_unintended_diff_only": 0.08075000941753388 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.9.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.9.hook_resid_post", |
| "hook_layer": 9, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.20750004053115845, |
| "tpp_threshold_2_intended_diff_only": 0.2980000376701355, |
| "tpp_threshold_2_unintended_diff_only": 0.09049999713897705, |
| "tpp_threshold_5_total_metric": 0.20100004971027374, |
| "tpp_threshold_5_intended_diff_only": 0.35300004482269287, |
| "tpp_threshold_5_unintended_diff_only": 0.15199999511241913, |
| "tpp_threshold_10_total_metric": 0.1885000318288803, |
| "tpp_threshold_10_intended_diff_only": 0.38600003719329834, |
| "tpp_threshold_10_unintended_diff_only": 0.19750000536441803, |
| "tpp_threshold_20_total_metric": 0.1810000240802765, |
| "tpp_threshold_20_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.22999998927116394, |
| "tpp_threshold_50_total_metric": 0.13800004124641418, |
| "tpp_threshold_50_intended_diff_only": 0.4310000538825989, |
| "tpp_threshold_50_unintended_diff_only": 0.2930000126361847, |
| "tpp_threshold_100_total_metric": 0.11750003695487976, |
| "tpp_threshold_100_intended_diff_only": 0.4450000524520874, |
| "tpp_threshold_100_unintended_diff_only": 0.32750001549720764, |
| "tpp_threshold_500_total_metric": 0.07175005972385406, |
| "tpp_threshold_500_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_500_unintended_diff_only": 0.377250000834465 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.12650005519390106, |
| "tpp_threshold_2_intended_diff_only": 0.15000003576278687, |
| "tpp_threshold_2_unintended_diff_only": 0.023499980568885803, |
| "tpp_threshold_5_total_metric": 0.18500003218650818, |
| "tpp_threshold_5_intended_diff_only": 0.23400002717971802, |
| "tpp_threshold_5_unintended_diff_only": 0.04899999499320984, |
| "tpp_threshold_10_total_metric": 0.2172500342130661, |
| "tpp_threshold_10_intended_diff_only": 0.2930000424385071, |
| "tpp_threshold_10_unintended_diff_only": 0.07575000822544098, |
| "tpp_threshold_20_total_metric": 0.18725000321865082, |
| "tpp_threshold_20_intended_diff_only": 0.3790000081062317, |
| "tpp_threshold_20_unintended_diff_only": 0.19175000488758087, |
| "tpp_threshold_50_total_metric": 0.1495000571012497, |
| "tpp_threshold_50_intended_diff_only": 0.4310000538825989, |
| "tpp_threshold_50_unintended_diff_only": 0.2814999967813492, |
| "tpp_threshold_100_total_metric": 0.11225004494190216, |
| "tpp_threshold_100_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_100_unintended_diff_only": 0.34575001895427704, |
| "tpp_threshold_500_total_metric": 0.0675000548362732, |
| "tpp_threshold_500_intended_diff_only": 0.4630000591278076, |
| "tpp_threshold_500_unintended_diff_only": 0.3955000042915344 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.07575002312660217, |
| "tpp_threshold_2_intended_diff_only": 0.12000000476837158, |
| "tpp_threshold_2_unintended_diff_only": 0.04424998164176941, |
| "tpp_threshold_5_total_metric": 0.1652500182390213, |
| "tpp_threshold_5_intended_diff_only": 0.2160000205039978, |
| "tpp_threshold_5_unintended_diff_only": 0.0507500022649765, |
| "tpp_threshold_10_total_metric": 0.1445000022649765, |
| "tpp_threshold_10_intended_diff_only": 0.33399999141693115, |
| "tpp_threshold_10_unintended_diff_only": 0.18949998915195465, |
| "tpp_threshold_20_total_metric": 0.12049995362758636, |
| "tpp_threshold_20_intended_diff_only": 0.37699997425079346, |
| "tpp_threshold_20_unintended_diff_only": 0.2565000206232071, |
| "tpp_threshold_50_total_metric": 0.12299999594688416, |
| "tpp_threshold_50_intended_diff_only": 0.4300000071525574, |
| "tpp_threshold_50_unintended_diff_only": 0.3070000112056732, |
| "tpp_threshold_100_total_metric": 0.11699996888637543, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.3330000191926956, |
| "tpp_threshold_500_total_metric": 0.05350002646446228, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.4025000035762787 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.08649998903274536, |
| "tpp_threshold_2_intended_diff_only": 0.10100001096725464, |
| "tpp_threshold_2_unintended_diff_only": 0.014500021934509277, |
| "tpp_threshold_5_total_metric": 0.2434999942779541, |
| "tpp_threshold_5_intended_diff_only": 0.28299999237060547, |
| "tpp_threshold_5_unintended_diff_only": 0.03949999809265137, |
| "tpp_threshold_10_total_metric": 0.3344999849796295, |
| "tpp_threshold_10_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_10_unintended_diff_only": 0.11550000309944153, |
| "tpp_threshold_20_total_metric": 0.26375000178813934, |
| "tpp_threshold_20_intended_diff_only": 0.47600001096725464, |
| "tpp_threshold_20_unintended_diff_only": 0.2122500091791153, |
| "tpp_threshold_50_total_metric": 0.2162499874830246, |
| "tpp_threshold_50_intended_diff_only": 0.4860000014305115, |
| "tpp_threshold_50_unintended_diff_only": 0.2697500139474869, |
| "tpp_threshold_100_total_metric": 0.18349997699260712, |
| "tpp_threshold_100_intended_diff_only": 0.4909999966621399, |
| "tpp_threshold_100_unintended_diff_only": 0.3075000196695328, |
| "tpp_threshold_500_total_metric": 0.1120000034570694, |
| "tpp_threshold_500_intended_diff_only": 0.49800002574920654, |
| "tpp_threshold_500_unintended_diff_only": 0.38600002229213715 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.10899999737739563, |
| "tpp_threshold_2_intended_diff_only": 0.15299999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.04399999976158142, |
| "tpp_threshold_5_total_metric": 0.1927499622106552, |
| "tpp_threshold_5_intended_diff_only": 0.31199997663497925, |
| "tpp_threshold_5_unintended_diff_only": 0.11925001442432404, |
| "tpp_threshold_10_total_metric": 0.16099995374679565, |
| "tpp_threshold_10_intended_diff_only": 0.3819999694824219, |
| "tpp_threshold_10_unintended_diff_only": 0.22100001573562622, |
| "tpp_threshold_20_total_metric": 0.19349998235702515, |
| "tpp_threshold_20_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_20_unintended_diff_only": 0.24050003290176392, |
| "tpp_threshold_50_total_metric": 0.1912499964237213, |
| "tpp_threshold_50_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_50_unintended_diff_only": 0.288750022649765, |
| "tpp_threshold_100_total_metric": 0.1365000158548355, |
| "tpp_threshold_100_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_100_unintended_diff_only": 0.3435000032186508, |
| "tpp_threshold_500_total_metric": 0.08875001966953278, |
| "tpp_threshold_500_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_500_unintended_diff_only": 0.39124999940395355 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.00449998676776886, |
| "tpp_threshold_2_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_2_unintended_diff_only": 0.0045000165700912476, |
| "tpp_threshold_5_total_metric": 0.00024999678134918213, |
| "tpp_threshold_5_intended_diff_only": 0.004999995231628418, |
| "tpp_threshold_5_unintended_diff_only": 0.004749998450279236, |
| "tpp_threshold_10_total_metric": 0.002499982714653015, |
| "tpp_threshold_10_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_10_unintended_diff_only": 0.006500020623207092, |
| "tpp_threshold_20_total_metric": 0.009749993681907654, |
| "tpp_threshold_20_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_20_unintended_diff_only": 0.008250012993812561, |
| "tpp_threshold_50_total_metric": 0.07100002467632294, |
| "tpp_threshold_50_intended_diff_only": 0.0910000205039978, |
| "tpp_threshold_50_unintended_diff_only": 0.019999995827674866, |
| "tpp_threshold_100_total_metric": 0.17949999868869781, |
| "tpp_threshold_100_intended_diff_only": 0.20800000429153442, |
| "tpp_threshold_100_unintended_diff_only": 0.02850000560283661, |
| "tpp_threshold_500_total_metric": 0.38575001060962677, |
| "tpp_threshold_500_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_500_unintended_diff_only": 0.06125001609325409 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.0102500319480896, |
| "tpp_threshold_2_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_2_unintended_diff_only": 0.0017499923706054688, |
| "tpp_threshold_5_total_metric": 0.009000048041343689, |
| "tpp_threshold_5_intended_diff_only": 0.020000040531158447, |
| "tpp_threshold_5_unintended_diff_only": 0.010999992489814758, |
| "tpp_threshold_10_total_metric": 0.04050000011920929, |
| "tpp_threshold_10_intended_diff_only": 0.0559999942779541, |
| "tpp_threshold_10_unintended_diff_only": 0.015499994158744812, |
| "tpp_threshold_20_total_metric": 0.0792500376701355, |
| "tpp_threshold_20_intended_diff_only": 0.09400004148483276, |
| "tpp_threshold_20_unintended_diff_only": 0.014750003814697266, |
| "tpp_threshold_50_total_metric": 0.24175003170967102, |
| "tpp_threshold_50_intended_diff_only": 0.2760000228881836, |
| "tpp_threshold_50_unintended_diff_only": 0.03424999117851257, |
| "tpp_threshold_100_total_metric": 0.3410000205039978, |
| "tpp_threshold_100_intended_diff_only": 0.3920000195503235, |
| "tpp_threshold_100_unintended_diff_only": 0.050999999046325684, |
| "tpp_threshold_500_total_metric": 0.3605000525712967, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.08350001275539398 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.011249974370002747, |
| "tpp_threshold_2_intended_diff_only": -0.0059999823570251465, |
| "tpp_threshold_2_unintended_diff_only": 0.0052499920129776, |
| "tpp_threshold_5_total_metric": 0.0, |
| "tpp_threshold_5_intended_diff_only": 0.0, |
| "tpp_threshold_5_unintended_diff_only": 0.0, |
| "tpp_threshold_10_total_metric": 0.012749999761581421, |
| "tpp_threshold_10_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_10_unintended_diff_only": 0.0012499988079071045, |
| "tpp_threshold_20_total_metric": 0.0037500113248825073, |
| "tpp_threshold_20_intended_diff_only": 0.017000019550323486, |
| "tpp_threshold_20_unintended_diff_only": 0.013250008225440979, |
| "tpp_threshold_50_total_metric": 0.07924999296665192, |
| "tpp_threshold_50_intended_diff_only": 0.09799998998641968, |
| "tpp_threshold_50_unintended_diff_only": 0.01874999701976776, |
| "tpp_threshold_100_total_metric": 0.2187499850988388, |
| "tpp_threshold_100_intended_diff_only": 0.25, |
| "tpp_threshold_100_unintended_diff_only": 0.031250014901161194, |
| "tpp_threshold_500_total_metric": 0.26500003039836884, |
| "tpp_threshold_500_intended_diff_only": 0.4180000424385071, |
| "tpp_threshold_500_unintended_diff_only": 0.15300001204013824 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.020000025629997253, |
| "tpp_threshold_2_intended_diff_only": 0.026000022888183594, |
| "tpp_threshold_2_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_5_total_metric": 0.07825000584125519, |
| "tpp_threshold_5_intended_diff_only": 0.08300000429153442, |
| "tpp_threshold_5_unintended_diff_only": 0.004749998450279236, |
| "tpp_threshold_10_total_metric": 0.11449998617172241, |
| "tpp_threshold_10_intended_diff_only": 0.12599998712539673, |
| "tpp_threshold_10_unintended_diff_only": 0.011500000953674316, |
| "tpp_threshold_20_total_metric": 0.14949996769428253, |
| "tpp_threshold_20_intended_diff_only": 0.1629999876022339, |
| "tpp_threshold_20_unintended_diff_only": 0.013500019907951355, |
| "tpp_threshold_50_total_metric": 0.23649998009204865, |
| "tpp_threshold_50_intended_diff_only": 0.2549999952316284, |
| "tpp_threshold_50_unintended_diff_only": 0.018500015139579773, |
| "tpp_threshold_100_total_metric": 0.3842499852180481, |
| "tpp_threshold_100_intended_diff_only": 0.40299999713897705, |
| "tpp_threshold_100_unintended_diff_only": 0.018750011920928955, |
| "tpp_threshold_500_total_metric": 0.3695000410079956, |
| "tpp_threshold_500_intended_diff_only": 0.4180000424385071, |
| "tpp_threshold_500_unintended_diff_only": 0.048500001430511475 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.051500022411346436, |
| "tpp_threshold_2_intended_diff_only": 0.054000020027160645, |
| "tpp_threshold_2_unintended_diff_only": 0.002499997615814209, |
| "tpp_threshold_5_total_metric": 0.08624999225139618, |
| "tpp_threshold_5_intended_diff_only": 0.08399999141693115, |
| "tpp_threshold_5_unintended_diff_only": -0.002250000834465027, |
| "tpp_threshold_10_total_metric": 0.12575004994869232, |
| "tpp_threshold_10_intended_diff_only": 0.13100004196166992, |
| "tpp_threshold_10_unintended_diff_only": 0.0052499920129776, |
| "tpp_threshold_20_total_metric": 0.17150002717971802, |
| "tpp_threshold_20_intended_diff_only": 0.18300002813339233, |
| "tpp_threshold_20_unintended_diff_only": 0.011500000953674316, |
| "tpp_threshold_50_total_metric": 0.2970000356435776, |
| "tpp_threshold_50_intended_diff_only": 0.3110000491142273, |
| "tpp_threshold_50_unintended_diff_only": 0.01400001347064972, |
| "tpp_threshold_100_total_metric": 0.3472500592470169, |
| "tpp_threshold_100_intended_diff_only": 0.3710000514984131, |
| "tpp_threshold_100_unintended_diff_only": 0.02374999225139618, |
| "tpp_threshold_500_total_metric": 0.32750004529953003, |
| "tpp_threshold_500_intended_diff_only": 0.3850000500679016, |
| "tpp_threshold_500_unintended_diff_only": 0.05750000476837158 |
| } |
| } |
| } |
| } |