| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752417333, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0504749983549118, |
| "tpp_threshold_2_intended_diff_only": 0.06300000548362732, |
| "tpp_threshold_2_unintended_diff_only": 0.012525007128715515, |
| "tpp_threshold_5_total_metric": 0.12190000414848327, |
| "tpp_threshold_5_intended_diff_only": 0.15250000953674317, |
| "tpp_threshold_5_unintended_diff_only": 0.03060000538825989, |
| "tpp_threshold_10_total_metric": 0.1714500069618225, |
| "tpp_threshold_10_intended_diff_only": 0.23470001220703124, |
| "tpp_threshold_10_unintended_diff_only": 0.06325000524520874, |
| "tpp_threshold_20_total_metric": 0.20885000675916673, |
| "tpp_threshold_20_intended_diff_only": 0.3016000151634216, |
| "tpp_threshold_20_unintended_diff_only": 0.09275000840425492, |
| "tpp_threshold_50_total_metric": 0.254925012588501, |
| "tpp_threshold_50_intended_diff_only": 0.3901000201702118, |
| "tpp_threshold_50_unintended_diff_only": 0.1351750075817108, |
| "tpp_threshold_100_total_metric": 0.2638250172138214, |
| "tpp_threshold_100_intended_diff_only": 0.43110002875328063, |
| "tpp_threshold_100_unintended_diff_only": 0.16727501153945923, |
| "tpp_threshold_500_total_metric": 0.22835002839565277, |
| "tpp_threshold_500_intended_diff_only": 0.44750004410743716, |
| "tpp_threshold_500_unintended_diff_only": 0.21915001571178438 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.07439999580383301, |
| "tpp_threshold_2_intended_diff_only": 0.09720000028610229, |
| "tpp_threshold_2_unintended_diff_only": 0.022800004482269286, |
| "tpp_threshold_5_total_metric": 0.1940000146627426, |
| "tpp_threshold_5_intended_diff_only": 0.253600013256073, |
| "tpp_threshold_5_unintended_diff_only": 0.059599998593330386, |
| "tpp_threshold_10_total_metric": 0.2573000192642212, |
| "tpp_threshold_10_intended_diff_only": 0.3788000226020813, |
| "tpp_threshold_10_unintended_diff_only": 0.12150000333786011, |
| "tpp_threshold_20_total_metric": 0.25490000247955324, |
| "tpp_threshold_20_intended_diff_only": 0.42560001611709597, |
| "tpp_threshold_20_unintended_diff_only": 0.17070001363754272, |
| "tpp_threshold_50_total_metric": 0.21130000054836273, |
| "tpp_threshold_50_intended_diff_only": 0.4582000136375427, |
| "tpp_threshold_50_unintended_diff_only": 0.24690001308918, |
| "tpp_threshold_100_total_metric": 0.1640000194311142, |
| "tpp_threshold_100_intended_diff_only": 0.4686000347137451, |
| "tpp_threshold_100_unintended_diff_only": 0.3046000152826309, |
| "tpp_threshold_500_total_metric": 0.0841000258922577, |
| "tpp_threshold_500_intended_diff_only": 0.4704000473022461, |
| "tpp_threshold_500_unintended_diff_only": 0.3863000214099884 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.0265500009059906, |
| "tpp_threshold_2_intended_diff_only": 0.028800010681152344, |
| "tpp_threshold_2_unintended_diff_only": 0.0022500097751617433, |
| "tpp_threshold_5_total_metric": 0.04979999363422394, |
| "tpp_threshold_5_intended_diff_only": 0.05140000581741333, |
| "tpp_threshold_5_unintended_diff_only": 0.001600012183189392, |
| "tpp_threshold_10_total_metric": 0.08559999465942383, |
| "tpp_threshold_10_intended_diff_only": 0.0906000018119812, |
| "tpp_threshold_10_unintended_diff_only": 0.005000007152557373, |
| "tpp_threshold_20_total_metric": 0.16280001103878022, |
| "tpp_threshold_20_intended_diff_only": 0.1776000142097473, |
| "tpp_threshold_20_unintended_diff_only": 0.014800003170967102, |
| "tpp_threshold_50_total_metric": 0.2985500246286392, |
| "tpp_threshold_50_intended_diff_only": 0.32200002670288086, |
| "tpp_threshold_50_unintended_diff_only": 0.023450002074241638, |
| "tpp_threshold_100_total_metric": 0.3636500149965286, |
| "tpp_threshold_100_intended_diff_only": 0.39360002279281614, |
| "tpp_threshold_100_unintended_diff_only": 0.029950007796287537, |
| "tpp_threshold_500_total_metric": 0.37260003089904786, |
| "tpp_threshold_500_intended_diff_only": 0.42460004091262815, |
| "tpp_threshold_500_unintended_diff_only": 0.052000010013580324 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.8.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.8.hook_resid_post", |
| "hook_layer": 8, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.08900001645088196, |
| "tpp_threshold_2_intended_diff_only": 0.10500001907348633, |
| "tpp_threshold_2_unintended_diff_only": 0.01600000262260437, |
| "tpp_threshold_5_total_metric": 0.17475003004074097, |
| "tpp_threshold_5_intended_diff_only": 0.22100001573562622, |
| "tpp_threshold_5_unintended_diff_only": 0.046249985694885254, |
| "tpp_threshold_10_total_metric": 0.2537500262260437, |
| "tpp_threshold_10_intended_diff_only": 0.3370000123977661, |
| "tpp_threshold_10_unintended_diff_only": 0.08324998617172241, |
| "tpp_threshold_20_total_metric": 0.27125002443790436, |
| "tpp_threshold_20_intended_diff_only": 0.38600003719329834, |
| "tpp_threshold_20_unintended_diff_only": 0.11475001275539398, |
| "tpp_threshold_50_total_metric": 0.2055000215768814, |
| "tpp_threshold_50_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_50_unintended_diff_only": 0.22750000655651093, |
| "tpp_threshold_100_total_metric": 0.1510000228881836, |
| "tpp_threshold_100_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_100_unintended_diff_only": 0.30000001192092896, |
| "tpp_threshold_500_total_metric": 0.07825006544589996, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.3747500032186508 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.11750002205371857, |
| "tpp_threshold_2_intended_diff_only": 0.1420000195503235, |
| "tpp_threshold_2_unintended_diff_only": 0.02449999749660492, |
| "tpp_threshold_5_total_metric": 0.1522500365972519, |
| "tpp_threshold_5_intended_diff_only": 0.18700003623962402, |
| "tpp_threshold_5_unintended_diff_only": 0.03474999964237213, |
| "tpp_threshold_10_total_metric": 0.2157500684261322, |
| "tpp_threshold_10_intended_diff_only": 0.3190000653266907, |
| "tpp_threshold_10_unintended_diff_only": 0.10324999690055847, |
| "tpp_threshold_20_total_metric": 0.23125004768371582, |
| "tpp_threshold_20_intended_diff_only": 0.362000048160553, |
| "tpp_threshold_20_unintended_diff_only": 0.13075000047683716, |
| "tpp_threshold_50_total_metric": 0.20025001466274261, |
| "tpp_threshold_50_intended_diff_only": 0.4240000247955322, |
| "tpp_threshold_50_unintended_diff_only": 0.2237500101327896, |
| "tpp_threshold_100_total_metric": 0.15775004029273987, |
| "tpp_threshold_100_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_100_unintended_diff_only": 0.29725000262260437, |
| "tpp_threshold_500_total_metric": 0.06100006401538849, |
| "tpp_threshold_500_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_500_unintended_diff_only": 0.4010000079870224 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.028749987483024597, |
| "tpp_threshold_2_intended_diff_only": 0.041999995708465576, |
| "tpp_threshold_2_unintended_diff_only": 0.013250008225440979, |
| "tpp_threshold_5_total_metric": 0.15524999797344208, |
| "tpp_threshold_5_intended_diff_only": 0.18900001049041748, |
| "tpp_threshold_5_unintended_diff_only": 0.0337500125169754, |
| "tpp_threshold_10_total_metric": 0.21850000321865082, |
| "tpp_threshold_10_intended_diff_only": 0.3410000205039978, |
| "tpp_threshold_10_unintended_diff_only": 0.12250001728534698, |
| "tpp_threshold_20_total_metric": 0.20524998009204865, |
| "tpp_threshold_20_intended_diff_only": 0.4169999957084656, |
| "tpp_threshold_20_unintended_diff_only": 0.21175001561641693, |
| "tpp_threshold_50_total_metric": 0.17899997532367706, |
| "tpp_threshold_50_intended_diff_only": 0.45899999141693115, |
| "tpp_threshold_50_unintended_diff_only": 0.2800000160932541, |
| "tpp_threshold_100_total_metric": 0.13975003361701965, |
| "tpp_threshold_100_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_100_unintended_diff_only": 0.320250004529953, |
| "tpp_threshold_500_total_metric": 0.05700001120567322, |
| "tpp_threshold_500_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_500_unintended_diff_only": 0.40300002694129944 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.030749991536140442, |
| "tpp_threshold_2_intended_diff_only": 0.041999995708465576, |
| "tpp_threshold_2_unintended_diff_only": 0.011250004172325134, |
| "tpp_threshold_5_total_metric": 0.2992500364780426, |
| "tpp_threshold_5_intended_diff_only": 0.38600003719329834, |
| "tpp_threshold_5_unintended_diff_only": 0.08675000071525574, |
| "tpp_threshold_10_total_metric": 0.35850000381469727, |
| "tpp_threshold_10_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_10_unintended_diff_only": 0.12150001525878906, |
| "tpp_threshold_20_total_metric": 0.31200000643730164, |
| "tpp_threshold_20_intended_diff_only": 0.48900002241134644, |
| "tpp_threshold_20_unintended_diff_only": 0.1770000159740448, |
| "tpp_threshold_50_total_metric": 0.2685000002384186, |
| "tpp_threshold_50_intended_diff_only": 0.4950000047683716, |
| "tpp_threshold_50_unintended_diff_only": 0.226500004529953, |
| "tpp_threshold_100_total_metric": 0.2120000123977661, |
| "tpp_threshold_100_intended_diff_only": 0.4970000386238098, |
| "tpp_threshold_100_unintended_diff_only": 0.2850000262260437, |
| "tpp_threshold_500_total_metric": 0.1315000057220459, |
| "tpp_threshold_500_intended_diff_only": 0.4970000386238098, |
| "tpp_threshold_500_unintended_diff_only": 0.3655000329017639 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.10599996149539948, |
| "tpp_threshold_2_intended_diff_only": 0.1549999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.04900000989437103, |
| "tpp_threshold_5_total_metric": 0.18849997222423553, |
| "tpp_threshold_5_intended_diff_only": 0.2849999666213989, |
| "tpp_threshold_5_unintended_diff_only": 0.09649999439716339, |
| "tpp_threshold_10_total_metric": 0.23999999463558197, |
| "tpp_threshold_10_intended_diff_only": 0.4169999957084656, |
| "tpp_threshold_10_unintended_diff_only": 0.1770000010728836, |
| "tpp_threshold_20_total_metric": 0.25474995374679565, |
| "tpp_threshold_20_intended_diff_only": 0.4739999771118164, |
| "tpp_threshold_20_unintended_diff_only": 0.21925002336502075, |
| "tpp_threshold_50_total_metric": 0.203249990940094, |
| "tpp_threshold_50_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_50_unintended_diff_only": 0.27675002813339233, |
| "tpp_threshold_100_total_metric": 0.15949998795986176, |
| "tpp_threshold_100_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_100_unintended_diff_only": 0.3205000311136246, |
| "tpp_threshold_500_total_metric": 0.09274998307228088, |
| "tpp_threshold_500_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_500_unintended_diff_only": 0.38725003600120544 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.0065000057220458984, |
| "tpp_threshold_2_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_2_unintended_diff_only": 0.002499997615814209, |
| "tpp_threshold_5_total_metric": 0.018000036478042603, |
| "tpp_threshold_5_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_5_unintended_diff_only": 0.0029999911785125732, |
| "tpp_threshold_10_total_metric": 0.0350000262260437, |
| "tpp_threshold_10_intended_diff_only": 0.04000002145767212, |
| "tpp_threshold_10_unintended_diff_only": 0.004999995231628418, |
| "tpp_threshold_20_total_metric": 0.09375004470348358, |
| "tpp_threshold_20_intended_diff_only": 0.1170000433921814, |
| "tpp_threshold_20_unintended_diff_only": 0.023249998688697815, |
| "tpp_threshold_50_total_metric": 0.2795000523328781, |
| "tpp_threshold_50_intended_diff_only": 0.3160000443458557, |
| "tpp_threshold_50_unintended_diff_only": 0.0364999920129776, |
| "tpp_threshold_100_total_metric": 0.364250048995018, |
| "tpp_threshold_100_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_100_unintended_diff_only": 0.04375000298023224, |
| "tpp_threshold_500_total_metric": 0.39225007593631744, |
| "tpp_threshold_500_intended_diff_only": 0.44800007343292236, |
| "tpp_threshold_500_unintended_diff_only": 0.05574999749660492 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.023249968886375427, |
| "tpp_threshold_2_intended_diff_only": 0.02399998903274536, |
| "tpp_threshold_2_unintended_diff_only": 0.0007500201463699341, |
| "tpp_threshold_5_total_metric": 0.05349995195865631, |
| "tpp_threshold_5_intended_diff_only": 0.06199997663497925, |
| "tpp_threshold_5_unintended_diff_only": 0.008500024676322937, |
| "tpp_threshold_10_total_metric": 0.09099997580051422, |
| "tpp_threshold_10_intended_diff_only": 0.0989999771118164, |
| "tpp_threshold_10_unintended_diff_only": 0.008000001311302185, |
| "tpp_threshold_20_total_metric": 0.2732499837875366, |
| "tpp_threshold_20_intended_diff_only": 0.3009999990463257, |
| "tpp_threshold_20_unintended_diff_only": 0.027750015258789062, |
| "tpp_threshold_50_total_metric": 0.38475003838539124, |
| "tpp_threshold_50_intended_diff_only": 0.4230000376701355, |
| "tpp_threshold_50_unintended_diff_only": 0.03824999928474426, |
| "tpp_threshold_100_total_metric": 0.39525000751018524, |
| "tpp_threshold_100_intended_diff_only": 0.4390000104904175, |
| "tpp_threshold_100_unintended_diff_only": 0.04375000298023224, |
| "tpp_threshold_500_total_metric": 0.37150003015995026, |
| "tpp_threshold_500_intended_diff_only": 0.4410000443458557, |
| "tpp_threshold_500_unintended_diff_only": 0.06950001418590546 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.006499975919723511, |
| "tpp_threshold_2_intended_diff_only": -0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": 0.004500001668930054, |
| "tpp_threshold_5_total_metric": 0.0017500072717666626, |
| "tpp_threshold_5_intended_diff_only": -0.001999974250793457, |
| "tpp_threshold_5_unintended_diff_only": -0.0037499815225601196, |
| "tpp_threshold_10_total_metric": 0.02425001561641693, |
| "tpp_threshold_10_intended_diff_only": 0.022000014781951904, |
| "tpp_threshold_10_unintended_diff_only": -0.002250000834465027, |
| "tpp_threshold_20_total_metric": 0.04499998688697815, |
| "tpp_threshold_20_intended_diff_only": 0.0559999942779541, |
| "tpp_threshold_20_unintended_diff_only": 0.011000007390975952, |
| "tpp_threshold_50_total_metric": 0.1677500456571579, |
| "tpp_threshold_50_intended_diff_only": 0.1820000410079956, |
| "tpp_threshold_50_unintended_diff_only": 0.014249995350837708, |
| "tpp_threshold_100_total_metric": 0.2902500331401825, |
| "tpp_threshold_100_intended_diff_only": 0.3160000443458557, |
| "tpp_threshold_100_unintended_diff_only": 0.025750011205673218, |
| "tpp_threshold_500_total_metric": 0.3682500123977661, |
| "tpp_threshold_500_intended_diff_only": 0.42000001668930054, |
| "tpp_threshold_500_unintended_diff_only": 0.051750004291534424 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.07649999856948853, |
| "tpp_threshold_2_intended_diff_only": 0.07700002193450928, |
| "tpp_threshold_2_unintended_diff_only": 0.000500023365020752, |
| "tpp_threshold_5_total_metric": 0.1250000149011612, |
| "tpp_threshold_5_intended_diff_only": 0.12800002098083496, |
| "tpp_threshold_5_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_10_total_metric": 0.179500013589859, |
| "tpp_threshold_10_intended_diff_only": 0.18800002336502075, |
| "tpp_threshold_10_unintended_diff_only": 0.008500009775161743, |
| "tpp_threshold_20_total_metric": 0.24300001561641693, |
| "tpp_threshold_20_intended_diff_only": 0.24900001287460327, |
| "tpp_threshold_20_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_50_total_metric": 0.3557499796152115, |
| "tpp_threshold_50_intended_diff_only": 0.3709999918937683, |
| "tpp_threshold_50_unintended_diff_only": 0.015250012278556824, |
| "tpp_threshold_100_total_metric": 0.4100000113248825, |
| "tpp_threshold_100_intended_diff_only": 0.42900002002716064, |
| "tpp_threshold_100_unintended_diff_only": 0.019000008702278137, |
| "tpp_threshold_500_total_metric": 0.38575002551078796, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.04625001549720764 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.03300000727176666, |
| "tpp_threshold_2_intended_diff_only": 0.03600001335144043, |
| "tpp_threshold_2_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_5_total_metric": 0.05074995756149292, |
| "tpp_threshold_5_intended_diff_only": 0.04799997806549072, |
| "tpp_threshold_5_unintended_diff_only": -0.0027499794960021973, |
| "tpp_threshold_10_total_metric": 0.09824994206428528, |
| "tpp_threshold_10_intended_diff_only": 0.10399997234344482, |
| "tpp_threshold_10_unintended_diff_only": 0.005750030279159546, |
| "tpp_threshold_20_total_metric": 0.15900002419948578, |
| "tpp_threshold_20_intended_diff_only": 0.16500002145767212, |
| "tpp_threshold_20_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_50_total_metric": 0.3050000071525574, |
| "tpp_threshold_50_intended_diff_only": 0.31800001859664917, |
| "tpp_threshold_50_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_100_total_metric": 0.3584999740123749, |
| "tpp_threshold_100_intended_diff_only": 0.37599998712539673, |
| "tpp_threshold_100_unintended_diff_only": 0.01750001311302185, |
| "tpp_threshold_500_total_metric": 0.3452500104904175, |
| "tpp_threshold_500_intended_diff_only": 0.38200002908706665, |
| "tpp_threshold_500_unintended_diff_only": 0.03675001859664917 |
| } |
| } |
| } |
| } |