| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745619088254, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.09545000046491624, |
| "tpp_threshold_2_intended_diff_only": 0.121399986743927, |
| "tpp_threshold_2_unintended_diff_only": 0.025949986279010774, |
| "tpp_threshold_5_total_metric": 0.13292499631643295, |
| "tpp_threshold_5_intended_diff_only": 0.2010999858379364, |
| "tpp_threshold_5_unintended_diff_only": 0.06817498952150344, |
| "tpp_threshold_10_total_metric": 0.1396750181913376, |
| "tpp_threshold_10_intended_diff_only": 0.24480000734329221, |
| "tpp_threshold_10_unintended_diff_only": 0.10512498915195465, |
| "tpp_threshold_20_total_metric": 0.16092501133680343, |
| "tpp_threshold_20_intended_diff_only": 0.29480000138282775, |
| "tpp_threshold_20_unintended_diff_only": 0.13387499004602432, |
| "tpp_threshold_50_total_metric": 0.20012502372264862, |
| "tpp_threshold_50_intended_diff_only": 0.3724000155925751, |
| "tpp_threshold_50_unintended_diff_only": 0.17227499186992645, |
| "tpp_threshold_100_total_metric": 0.22265001237392426, |
| "tpp_threshold_100_intended_diff_only": 0.4143000066280365, |
| "tpp_threshold_100_unintended_diff_only": 0.19164999425411225, |
| "tpp_threshold_500_total_metric": 0.20240003168582915, |
| "tpp_threshold_500_intended_diff_only": 0.44320002794265745, |
| "tpp_threshold_500_unintended_diff_only": 0.2407999962568283 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.18059999346733094, |
| "tpp_threshold_2_intended_diff_only": 0.2245999813079834, |
| "tpp_threshold_2_unintended_diff_only": 0.04399998784065247, |
| "tpp_threshold_5_total_metric": 0.22809999883174897, |
| "tpp_threshold_5_intended_diff_only": 0.3497999906539917, |
| "tpp_threshold_5_unintended_diff_only": 0.12169999182224274, |
| "tpp_threshold_10_total_metric": 0.21785001456737518, |
| "tpp_threshold_10_intended_diff_only": 0.40780000686645507, |
| "tpp_threshold_10_unintended_diff_only": 0.18994999229907988, |
| "tpp_threshold_20_total_metric": 0.19220001101493836, |
| "tpp_threshold_20_intended_diff_only": 0.43820000886917115, |
| "tpp_threshold_20_unintended_diff_only": 0.2459999978542328, |
| "tpp_threshold_50_total_metric": 0.1501500278711319, |
| "tpp_threshold_50_intended_diff_only": 0.4600000262260437, |
| "tpp_threshold_50_unintended_diff_only": 0.3098499983549118, |
| "tpp_threshold_100_total_metric": 0.12410001158714294, |
| "tpp_threshold_100_intended_diff_only": 0.46480001211166383, |
| "tpp_threshold_100_unintended_diff_only": 0.3407000005245209, |
| "tpp_threshold_500_total_metric": 0.0799500286579132, |
| "tpp_threshold_500_intended_diff_only": 0.46700003147125246, |
| "tpp_threshold_500_unintended_diff_only": 0.38705000281333923 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.010300007462501527, |
| "tpp_threshold_2_intended_diff_only": 0.018199992179870606, |
| "tpp_threshold_2_unintended_diff_only": 0.00789998471736908, |
| "tpp_threshold_5_total_metric": 0.037749993801116946, |
| "tpp_threshold_5_intended_diff_only": 0.052399981021881106, |
| "tpp_threshold_5_unintended_diff_only": 0.01464998722076416, |
| "tpp_threshold_10_total_metric": 0.06150002181529999, |
| "tpp_threshold_10_intended_diff_only": 0.08180000782012939, |
| "tpp_threshold_10_unintended_diff_only": 0.020299986004829407, |
| "tpp_threshold_20_total_metric": 0.12965001165866852, |
| "tpp_threshold_20_intended_diff_only": 0.15139999389648437, |
| "tpp_threshold_20_unintended_diff_only": 0.02174998223781586, |
| "tpp_threshold_50_total_metric": 0.25010001957416533, |
| "tpp_threshold_50_intended_diff_only": 0.28480000495910646, |
| "tpp_threshold_50_unintended_diff_only": 0.0346999853849411, |
| "tpp_threshold_100_total_metric": 0.32120001316070557, |
| "tpp_threshold_100_intended_diff_only": 0.3638000011444092, |
| "tpp_threshold_100_unintended_diff_only": 0.04259998798370361, |
| "tpp_threshold_500_total_metric": 0.32485003471374513, |
| "tpp_threshold_500_intended_diff_only": 0.4194000244140625, |
| "tpp_threshold_500_unintended_diff_only": 0.09454998970031739 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.15.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.15.hook_resid_post", |
| "hook_layer": 15, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.21000000834465027, |
| "tpp_threshold_2_intended_diff_only": 0.27300000190734863, |
| "tpp_threshold_2_unintended_diff_only": 0.06299999356269836, |
| "tpp_threshold_5_total_metric": 0.20150001347064972, |
| "tpp_threshold_5_intended_diff_only": 0.33799999952316284, |
| "tpp_threshold_5_unintended_diff_only": 0.13649998605251312, |
| "tpp_threshold_10_total_metric": 0.19275003671646118, |
| "tpp_threshold_10_intended_diff_only": 0.36400002241134644, |
| "tpp_threshold_10_unintended_diff_only": 0.17124998569488525, |
| "tpp_threshold_20_total_metric": 0.1614999920129776, |
| "tpp_threshold_20_intended_diff_only": 0.40799999237060547, |
| "tpp_threshold_20_unintended_diff_only": 0.24650000035762787, |
| "tpp_threshold_50_total_metric": 0.13100002706050873, |
| "tpp_threshold_50_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_50_unintended_diff_only": 0.3060000091791153, |
| "tpp_threshold_100_total_metric": 0.10350003838539124, |
| "tpp_threshold_100_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_100_unintended_diff_only": 0.3434999883174896, |
| "tpp_threshold_500_total_metric": 0.04975004494190216, |
| "tpp_threshold_500_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_500_unintended_diff_only": 0.4012499898672104 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.1665000319480896, |
| "tpp_threshold_2_intended_diff_only": 0.24500000476837158, |
| "tpp_threshold_2_unintended_diff_only": 0.07849997282028198, |
| "tpp_threshold_5_total_metric": 0.179500013589859, |
| "tpp_threshold_5_intended_diff_only": 0.3140000104904175, |
| "tpp_threshold_5_unintended_diff_only": 0.13449999690055847, |
| "tpp_threshold_10_total_metric": 0.16175006330013275, |
| "tpp_threshold_10_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.21424998342990875, |
| "tpp_threshold_20_total_metric": 0.12950006127357483, |
| "tpp_threshold_20_intended_diff_only": 0.409000039100647, |
| "tpp_threshold_20_unintended_diff_only": 0.27949997782707214, |
| "tpp_threshold_50_total_metric": 0.11200007796287537, |
| "tpp_threshold_50_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_50_unintended_diff_only": 0.3319999873638153, |
| "tpp_threshold_100_total_metric": 0.08250002562999725, |
| "tpp_threshold_100_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_100_unintended_diff_only": 0.3735000044107437, |
| "tpp_threshold_500_total_metric": 0.05850006639957428, |
| "tpp_threshold_500_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_500_unintended_diff_only": 0.3994999974966049 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.22599996626377106, |
| "tpp_threshold_2_intended_diff_only": 0.2669999599456787, |
| "tpp_threshold_2_unintended_diff_only": 0.040999993681907654, |
| "tpp_threshold_5_total_metric": 0.19599997997283936, |
| "tpp_threshold_5_intended_diff_only": 0.390999972820282, |
| "tpp_threshold_5_unintended_diff_only": 0.19499999284744263, |
| "tpp_threshold_10_total_metric": 0.1707499772310257, |
| "tpp_threshold_10_intended_diff_only": 0.4269999861717224, |
| "tpp_threshold_10_unintended_diff_only": 0.2562500089406967, |
| "tpp_threshold_20_total_metric": 0.15324999392032623, |
| "tpp_threshold_20_intended_diff_only": 0.4350000023841858, |
| "tpp_threshold_20_unintended_diff_only": 0.28175000846385956, |
| "tpp_threshold_50_total_metric": 0.11099998652935028, |
| "tpp_threshold_50_intended_diff_only": 0.45499998331069946, |
| "tpp_threshold_50_unintended_diff_only": 0.3439999967813492, |
| "tpp_threshold_100_total_metric": 0.08649997413158417, |
| "tpp_threshold_100_intended_diff_only": 0.4559999704360962, |
| "tpp_threshold_100_unintended_diff_only": 0.369499996304512, |
| "tpp_threshold_500_total_metric": 0.05125001072883606, |
| "tpp_threshold_500_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_500_unintended_diff_only": 0.40575000643730164 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.25874999165534973, |
| "tpp_threshold_2_intended_diff_only": 0.2879999876022339, |
| "tpp_threshold_2_unintended_diff_only": 0.029249995946884155, |
| "tpp_threshold_5_total_metric": 0.3437500149011612, |
| "tpp_threshold_5_intended_diff_only": 0.40700000524520874, |
| "tpp_threshold_5_unintended_diff_only": 0.06324999034404755, |
| "tpp_threshold_10_total_metric": 0.34699998795986176, |
| "tpp_threshold_10_intended_diff_only": 0.468999981880188, |
| "tpp_threshold_10_unintended_diff_only": 0.12199999392032623, |
| "tpp_threshold_20_total_metric": 0.28474999964237213, |
| "tpp_threshold_20_intended_diff_only": 0.4819999933242798, |
| "tpp_threshold_20_unintended_diff_only": 0.19724999368190765, |
| "tpp_threshold_50_total_metric": 0.22175002098083496, |
| "tpp_threshold_50_intended_diff_only": 0.48900002241134644, |
| "tpp_threshold_50_unintended_diff_only": 0.2672500014305115, |
| "tpp_threshold_100_total_metric": 0.19175000488758087, |
| "tpp_threshold_100_intended_diff_only": 0.49000000953674316, |
| "tpp_threshold_100_unintended_diff_only": 0.2982500046491623, |
| "tpp_threshold_500_total_metric": 0.1300000101327896, |
| "tpp_threshold_500_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_500_unintended_diff_only": 0.36400000751018524 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.041749969124794006, |
| "tpp_threshold_2_intended_diff_only": 0.04999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.008249983191490173, |
| "tpp_threshold_5_total_metric": 0.21974997222423553, |
| "tpp_threshold_5_intended_diff_only": 0.29899996519088745, |
| "tpp_threshold_5_unintended_diff_only": 0.07924999296665192, |
| "tpp_threshold_10_total_metric": 0.21700000762939453, |
| "tpp_threshold_10_intended_diff_only": 0.40299999713897705, |
| "tpp_threshold_10_unintended_diff_only": 0.18599998950958252, |
| "tpp_threshold_20_total_metric": 0.23200000822544098, |
| "tpp_threshold_20_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_20_unintended_diff_only": 0.22500000894069672, |
| "tpp_threshold_50_total_metric": 0.17500002682209015, |
| "tpp_threshold_50_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_50_unintended_diff_only": 0.29999999701976776, |
| "tpp_threshold_100_total_metric": 0.1562500149011612, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.3187500089406967, |
| "tpp_threshold_500_total_metric": 0.11025001108646393, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.364750012755394 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.01975002884864807, |
| "tpp_threshold_2_intended_diff_only": 0.022000014781951904, |
| "tpp_threshold_2_unintended_diff_only": 0.002249985933303833, |
| "tpp_threshold_5_total_metric": 0.02799999713897705, |
| "tpp_threshold_5_intended_diff_only": 0.03799998760223389, |
| "tpp_threshold_5_unintended_diff_only": 0.009999990463256836, |
| "tpp_threshold_10_total_metric": 0.04450003802776337, |
| "tpp_threshold_10_intended_diff_only": 0.05900001525878906, |
| "tpp_threshold_10_unintended_diff_only": 0.014499977231025696, |
| "tpp_threshold_20_total_metric": 0.0922500342130661, |
| "tpp_threshold_20_intended_diff_only": 0.10500001907348633, |
| "tpp_threshold_20_unintended_diff_only": 0.012749984860420227, |
| "tpp_threshold_50_total_metric": 0.2172500342130661, |
| "tpp_threshold_50_intended_diff_only": 0.24500000476837158, |
| "tpp_threshold_50_unintended_diff_only": 0.02774997055530548, |
| "tpp_threshold_100_total_metric": 0.34700003266334534, |
| "tpp_threshold_100_intended_diff_only": 0.36900001764297485, |
| "tpp_threshold_100_unintended_diff_only": 0.021999984979629517, |
| "tpp_threshold_500_total_metric": 0.4195000231266022, |
| "tpp_threshold_500_intended_diff_only": 0.4580000042915344, |
| "tpp_threshold_500_unintended_diff_only": 0.03849998116493225 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.011750012636184692, |
| "tpp_threshold_2_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_2_unintended_diff_only": 0.01124998927116394, |
| "tpp_threshold_5_total_metric": 0.049749955534935, |
| "tpp_threshold_5_intended_diff_only": 0.06299996376037598, |
| "tpp_threshold_5_unintended_diff_only": 0.013250008225440979, |
| "tpp_threshold_10_total_metric": 0.08525002002716064, |
| "tpp_threshold_10_intended_diff_only": 0.11500000953674316, |
| "tpp_threshold_10_unintended_diff_only": 0.02974998950958252, |
| "tpp_threshold_20_total_metric": 0.1927499771118164, |
| "tpp_threshold_20_intended_diff_only": 0.21999996900558472, |
| "tpp_threshold_20_unintended_diff_only": 0.02724999189376831, |
| "tpp_threshold_50_total_metric": 0.3227500170469284, |
| "tpp_threshold_50_intended_diff_only": 0.3830000162124634, |
| "tpp_threshold_50_unintended_diff_only": 0.06024999916553497, |
| "tpp_threshold_100_total_metric": 0.3477500230073929, |
| "tpp_threshold_100_intended_diff_only": 0.42000001668930054, |
| "tpp_threshold_100_unintended_diff_only": 0.07224999368190765, |
| "tpp_threshold_500_total_metric": 0.29875002801418304, |
| "tpp_threshold_500_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_500_unintended_diff_only": 0.1302499920129776 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.011999979615211487, |
| "tpp_threshold_2_intended_diff_only": 0.0209999680519104, |
| "tpp_threshold_2_unintended_diff_only": 0.008999988436698914, |
| "tpp_threshold_5_total_metric": 0.017250001430511475, |
| "tpp_threshold_5_intended_diff_only": 0.0339999794960022, |
| "tpp_threshold_5_unintended_diff_only": 0.016749978065490723, |
| "tpp_threshold_10_total_metric": 0.036750033497810364, |
| "tpp_threshold_10_intended_diff_only": 0.054000020027160645, |
| "tpp_threshold_10_unintended_diff_only": 0.01724998652935028, |
| "tpp_threshold_20_total_metric": 0.08750000596046448, |
| "tpp_threshold_20_intended_diff_only": 0.11299997568130493, |
| "tpp_threshold_20_unintended_diff_only": 0.025499969720840454, |
| "tpp_threshold_50_total_metric": 0.200500026345253, |
| "tpp_threshold_50_intended_diff_only": 0.22699999809265137, |
| "tpp_threshold_50_unintended_diff_only": 0.026499971747398376, |
| "tpp_threshold_100_total_metric": 0.2837500125169754, |
| "tpp_threshold_100_intended_diff_only": 0.3199999928474426, |
| "tpp_threshold_100_unintended_diff_only": 0.036249980330467224, |
| "tpp_threshold_500_total_metric": 0.2587500363588333, |
| "tpp_threshold_500_intended_diff_only": 0.4280000329017639, |
| "tpp_threshold_500_unintended_diff_only": 0.1692499965429306 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": -0.005249977111816406, |
| "tpp_threshold_2_intended_diff_only": 0.0, |
| "tpp_threshold_2_unintended_diff_only": 0.005249977111816406, |
| "tpp_threshold_5_total_metric": 0.03200000524520874, |
| "tpp_threshold_5_intended_diff_only": 0.05199998617172241, |
| "tpp_threshold_5_unintended_diff_only": 0.019999980926513672, |
| "tpp_threshold_10_total_metric": 0.07574999332427979, |
| "tpp_threshold_10_intended_diff_only": 0.09399998188018799, |
| "tpp_threshold_10_unintended_diff_only": 0.018249988555908203, |
| "tpp_threshold_20_total_metric": 0.14375002682209015, |
| "tpp_threshold_20_intended_diff_only": 0.16100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.01724998652935028, |
| "tpp_threshold_50_total_metric": 0.23899999260902405, |
| "tpp_threshold_50_intended_diff_only": 0.26499998569488525, |
| "tpp_threshold_50_unintended_diff_only": 0.025999993085861206, |
| "tpp_threshold_100_total_metric": 0.3072499781847, |
| "tpp_threshold_100_intended_diff_only": 0.3489999771118164, |
| "tpp_threshold_100_unintended_diff_only": 0.041749998927116394, |
| "tpp_threshold_500_total_metric": 0.3537500500679016, |
| "tpp_threshold_500_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_500_unintended_diff_only": 0.0652499794960022 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.013249993324279785, |
| "tpp_threshold_2_intended_diff_only": 0.02499997615814209, |
| "tpp_threshold_2_unintended_diff_only": 0.011749982833862305, |
| "tpp_threshold_5_total_metric": 0.061750009655952454, |
| "tpp_threshold_5_intended_diff_only": 0.07499998807907104, |
| "tpp_threshold_5_unintended_diff_only": 0.013249978423118591, |
| "tpp_threshold_10_total_metric": 0.06525002419948578, |
| "tpp_threshold_10_intended_diff_only": 0.08700001239776611, |
| "tpp_threshold_10_unintended_diff_only": 0.021749988198280334, |
| "tpp_threshold_20_total_metric": 0.13200001418590546, |
| "tpp_threshold_20_intended_diff_only": 0.15799999237060547, |
| "tpp_threshold_20_unintended_diff_only": 0.025999978184700012, |
| "tpp_threshold_50_total_metric": 0.2710000276565552, |
| "tpp_threshold_50_intended_diff_only": 0.30400002002716064, |
| "tpp_threshold_50_unintended_diff_only": 0.03299999237060547, |
| "tpp_threshold_100_total_metric": 0.3202500194311142, |
| "tpp_threshold_100_intended_diff_only": 0.3610000014305115, |
| "tpp_threshold_100_unintended_diff_only": 0.04074998199939728, |
| "tpp_threshold_500_total_metric": 0.29350003600120544, |
| "tpp_threshold_500_intended_diff_only": 0.3630000352859497, |
| "tpp_threshold_500_unintended_diff_only": 0.06949999928474426 |
| } |
| } |
| } |
| } |