| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752506332, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0697000116109848, |
| "tpp_threshold_2_intended_diff_only": 0.12050001621246338, |
| "tpp_threshold_2_unintended_diff_only": 0.05080000460147858, |
| "tpp_threshold_5_total_metric": 0.08902500420808791, |
| "tpp_threshold_5_intended_diff_only": 0.19230000972747802, |
| "tpp_threshold_5_unintended_diff_only": 0.10327500551939012, |
| "tpp_threshold_10_total_metric": 0.09484999924898146, |
| "tpp_threshold_10_intended_diff_only": 0.23190001249313352, |
| "tpp_threshold_10_unintended_diff_only": 0.13705001324415206, |
| "tpp_threshold_20_total_metric": 0.1159750059247017, |
| "tpp_threshold_20_intended_diff_only": 0.27680001854896547, |
| "tpp_threshold_20_unintended_diff_only": 0.16082501262426377, |
| "tpp_threshold_50_total_metric": 0.1700750082731247, |
| "tpp_threshold_50_intended_diff_only": 0.3628000199794769, |
| "tpp_threshold_50_unintended_diff_only": 0.19272501170635223, |
| "tpp_threshold_100_total_metric": 0.20317501127719878, |
| "tpp_threshold_100_intended_diff_only": 0.4226000249385834, |
| "tpp_threshold_100_unintended_diff_only": 0.2194250136613846, |
| "tpp_threshold_500_total_metric": 0.1917250320315361, |
| "tpp_threshold_500_intended_diff_only": 0.44680004715919497, |
| "tpp_threshold_500_unintended_diff_only": 0.25507501512765884 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.13325002789497375, |
| "tpp_threshold_2_intended_diff_only": 0.23200002908706666, |
| "tpp_threshold_2_unintended_diff_only": 0.0987500011920929, |
| "tpp_threshold_5_total_metric": 0.16080000400543212, |
| "tpp_threshold_5_intended_diff_only": 0.3630000114440918, |
| "tpp_threshold_5_unintended_diff_only": 0.20220000743865968, |
| "tpp_threshold_10_total_metric": 0.14279999136924743, |
| "tpp_threshold_10_intended_diff_only": 0.40500000715255735, |
| "tpp_threshold_10_unintended_diff_only": 0.2622000157833099, |
| "tpp_threshold_20_total_metric": 0.12925000488758087, |
| "tpp_threshold_20_intended_diff_only": 0.4344000220298767, |
| "tpp_threshold_20_unintended_diff_only": 0.30515001714229584, |
| "tpp_threshold_50_total_metric": 0.10180000364780425, |
| "tpp_threshold_50_intended_diff_only": 0.4588000178337097, |
| "tpp_threshold_50_unintended_diff_only": 0.35700001418590543, |
| "tpp_threshold_100_total_metric": 0.08685000538825989, |
| "tpp_threshold_100_intended_diff_only": 0.4694000244140625, |
| "tpp_threshold_100_unintended_diff_only": 0.3825500190258026, |
| "tpp_threshold_500_total_metric": 0.06040002703666687, |
| "tpp_threshold_500_intended_diff_only": 0.4714000463485718, |
| "tpp_threshold_500_unintended_diff_only": 0.4110000193119049 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.00614999532699585, |
| "tpp_threshold_2_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_2_unintended_diff_only": 0.0028500080108642576, |
| "tpp_threshold_5_total_metric": 0.017250004410743713, |
| "tpp_threshold_5_intended_diff_only": 0.021600008010864258, |
| "tpp_threshold_5_unintended_diff_only": 0.004350003600120544, |
| "tpp_threshold_10_total_metric": 0.04690000712871552, |
| "tpp_threshold_10_intended_diff_only": 0.058800017833709715, |
| "tpp_threshold_10_unintended_diff_only": 0.011900010704994201, |
| "tpp_threshold_20_total_metric": 0.10270000696182251, |
| "tpp_threshold_20_intended_diff_only": 0.1192000150680542, |
| "tpp_threshold_20_unintended_diff_only": 0.01650000810623169, |
| "tpp_threshold_50_total_metric": 0.23835001289844512, |
| "tpp_threshold_50_intended_diff_only": 0.2668000221252441, |
| "tpp_threshold_50_unintended_diff_only": 0.028450009226799012, |
| "tpp_threshold_100_total_metric": 0.3195000171661377, |
| "tpp_threshold_100_intended_diff_only": 0.37580002546310426, |
| "tpp_threshold_100_unintended_diff_only": 0.056300008296966554, |
| "tpp_threshold_500_total_metric": 0.32305003702640533, |
| "tpp_threshold_500_intended_diff_only": 0.42220004796981814, |
| "tpp_threshold_500_unintended_diff_only": 0.09915001094341278 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.10.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.10.hook_resid_post", |
| "hook_layer": 10, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1822500377893448, |
| "tpp_threshold_2_intended_diff_only": 0.34400004148483276, |
| "tpp_threshold_2_unintended_diff_only": 0.16175000369548798, |
| "tpp_threshold_5_total_metric": 0.14750003814697266, |
| "tpp_threshold_5_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_5_unintended_diff_only": 0.21950000524520874, |
| "tpp_threshold_10_total_metric": 0.12300002574920654, |
| "tpp_threshold_10_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_10_unintended_diff_only": 0.2770000100135803, |
| "tpp_threshold_20_total_metric": 0.11400000751018524, |
| "tpp_threshold_20_intended_diff_only": 0.4150000214576721, |
| "tpp_threshold_20_unintended_diff_only": 0.3010000139474869, |
| "tpp_threshold_50_total_metric": 0.07349999248981476, |
| "tpp_threshold_50_intended_diff_only": 0.4350000023841858, |
| "tpp_threshold_50_unintended_diff_only": 0.36150000989437103, |
| "tpp_threshold_100_total_metric": 0.06724999845027924, |
| "tpp_threshold_100_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_100_unintended_diff_only": 0.38475002348423004, |
| "tpp_threshold_500_total_metric": 0.04450003802776337, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.40950001776218414 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.13100001215934753, |
| "tpp_threshold_2_intended_diff_only": 0.2670000195503235, |
| "tpp_threshold_2_unintended_diff_only": 0.13600000739097595, |
| "tpp_threshold_5_total_metric": 0.1342499703168869, |
| "tpp_threshold_5_intended_diff_only": 0.3479999899864197, |
| "tpp_threshold_5_unintended_diff_only": 0.21375001966953278, |
| "tpp_threshold_10_total_metric": 0.12874995172023773, |
| "tpp_threshold_10_intended_diff_only": 0.37199997901916504, |
| "tpp_threshold_10_unintended_diff_only": 0.2432500272989273, |
| "tpp_threshold_20_total_metric": 0.09449994564056396, |
| "tpp_threshold_20_intended_diff_only": 0.4089999794960022, |
| "tpp_threshold_20_unintended_diff_only": 0.31450003385543823, |
| "tpp_threshold_50_total_metric": 0.085999995470047, |
| "tpp_threshold_50_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_50_unintended_diff_only": 0.3580000102519989, |
| "tpp_threshold_100_total_metric": 0.07349994778633118, |
| "tpp_threshold_100_intended_diff_only": 0.4599999785423279, |
| "tpp_threshold_100_unintended_diff_only": 0.3865000307559967, |
| "tpp_threshold_500_total_metric": 0.04800000786781311, |
| "tpp_threshold_500_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_500_unintended_diff_only": 0.4130000174045563 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.10050006210803986, |
| "tpp_threshold_2_intended_diff_only": 0.22300004959106445, |
| "tpp_threshold_2_unintended_diff_only": 0.1224999874830246, |
| "tpp_threshold_5_total_metric": 0.09075002372264862, |
| "tpp_threshold_5_intended_diff_only": 0.3270000219345093, |
| "tpp_threshold_5_unintended_diff_only": 0.23624999821186066, |
| "tpp_threshold_10_total_metric": 0.05824999511241913, |
| "tpp_threshold_10_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_10_unintended_diff_only": 0.31575001776218414, |
| "tpp_threshold_20_total_metric": 0.05950002372264862, |
| "tpp_threshold_20_intended_diff_only": 0.39500004053115845, |
| "tpp_threshold_20_unintended_diff_only": 0.3355000168085098, |
| "tpp_threshold_50_total_metric": 0.0729999989271164, |
| "tpp_threshold_50_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_50_unintended_diff_only": 0.3750000149011612, |
| "tpp_threshold_100_total_metric": 0.07700003683567047, |
| "tpp_threshold_100_intended_diff_only": 0.4630000591278076, |
| "tpp_threshold_100_unintended_diff_only": 0.38600002229213715, |
| "tpp_threshold_500_total_metric": 0.04475004971027374, |
| "tpp_threshold_500_intended_diff_only": 0.4630000591278076, |
| "tpp_threshold_500_unintended_diff_only": 0.4182500094175339 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.23325000703334808, |
| "tpp_threshold_2_intended_diff_only": 0.29500001668930054, |
| "tpp_threshold_2_unintended_diff_only": 0.061750009655952454, |
| "tpp_threshold_5_total_metric": 0.28300000727176666, |
| "tpp_threshold_5_intended_diff_only": 0.406000018119812, |
| "tpp_threshold_5_unintended_diff_only": 0.12300001084804535, |
| "tpp_threshold_10_total_metric": 0.2744999974966049, |
| "tpp_threshold_10_intended_diff_only": 0.4580000042915344, |
| "tpp_threshold_10_unintended_diff_only": 0.1835000067949295, |
| "tpp_threshold_20_total_metric": 0.23575003445148468, |
| "tpp_threshold_20_intended_diff_only": 0.47800004482269287, |
| "tpp_threshold_20_unintended_diff_only": 0.2422500103712082, |
| "tpp_threshold_50_total_metric": 0.15325000882148743, |
| "tpp_threshold_50_intended_diff_only": 0.484000027179718, |
| "tpp_threshold_50_unintended_diff_only": 0.3307500183582306, |
| "tpp_threshold_100_total_metric": 0.12325000762939453, |
| "tpp_threshold_100_intended_diff_only": 0.48900002241134644, |
| "tpp_threshold_100_unintended_diff_only": 0.3657500147819519, |
| "tpp_threshold_500_total_metric": 0.09575001895427704, |
| "tpp_threshold_500_intended_diff_only": 0.4960000514984131, |
| "tpp_threshold_500_unintended_diff_only": 0.40025003254413605 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.019250020384788513, |
| "tpp_threshold_2_intended_diff_only": 0.03100001811981201, |
| "tpp_threshold_2_unintended_diff_only": 0.011749997735023499, |
| "tpp_threshold_5_total_metric": 0.1484999805688858, |
| "tpp_threshold_5_intended_diff_only": 0.3669999837875366, |
| "tpp_threshold_5_unintended_diff_only": 0.21850000321865082, |
| "tpp_threshold_10_total_metric": 0.12949998676776886, |
| "tpp_threshold_10_intended_diff_only": 0.42100000381469727, |
| "tpp_threshold_10_unintended_diff_only": 0.2915000170469284, |
| "tpp_threshold_20_total_metric": 0.14250001311302185, |
| "tpp_threshold_20_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.33250001072883606, |
| "tpp_threshold_50_total_metric": 0.12325002253055573, |
| "tpp_threshold_50_intended_diff_only": 0.4830000400543213, |
| "tpp_threshold_50_unintended_diff_only": 0.35975001752376556, |
| "tpp_threshold_100_total_metric": 0.09325003623962402, |
| "tpp_threshold_100_intended_diff_only": 0.4830000400543213, |
| "tpp_threshold_100_unintended_diff_only": 0.38975000381469727, |
| "tpp_threshold_500_total_metric": 0.06900002062320709, |
| "tpp_threshold_500_intended_diff_only": 0.4830000400543213, |
| "tpp_threshold_500_unintended_diff_only": 0.4140000194311142 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.01199999451637268, |
| "tpp_threshold_2_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_2_unintended_diff_only": 0.0020000040531158447, |
| "tpp_threshold_5_total_metric": 0.00024999678134918213, |
| "tpp_threshold_5_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_5_unintended_diff_only": 0.0037500113248825073, |
| "tpp_threshold_10_total_metric": 0.007750004529953003, |
| "tpp_threshold_10_intended_diff_only": 0.022000014781951904, |
| "tpp_threshold_10_unintended_diff_only": 0.014250010251998901, |
| "tpp_threshold_20_total_metric": 0.02199999988079071, |
| "tpp_threshold_20_intended_diff_only": 0.041999995708465576, |
| "tpp_threshold_20_unintended_diff_only": 0.019999995827674866, |
| "tpp_threshold_50_total_metric": 0.13575001060962677, |
| "tpp_threshold_50_intended_diff_only": 0.1640000343322754, |
| "tpp_threshold_50_unintended_diff_only": 0.02825002372264862, |
| "tpp_threshold_100_total_metric": 0.24825000762939453, |
| "tpp_threshold_100_intended_diff_only": 0.28700000047683716, |
| "tpp_threshold_100_unintended_diff_only": 0.03874999284744263, |
| "tpp_threshold_500_total_metric": 0.3710000514984131, |
| "tpp_threshold_500_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_500_unintended_diff_only": 0.07899999618530273 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.015250012278556824, |
| "tpp_threshold_2_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_2_unintended_diff_only": 0.002749994397163391, |
| "tpp_threshold_5_total_metric": 0.01725001633167267, |
| "tpp_threshold_5_intended_diff_only": 0.027000010013580322, |
| "tpp_threshold_5_unintended_diff_only": 0.009749993681907654, |
| "tpp_threshold_10_total_metric": 0.06325003504753113, |
| "tpp_threshold_10_intended_diff_only": 0.07500004768371582, |
| "tpp_threshold_10_unintended_diff_only": 0.011750012636184692, |
| "tpp_threshold_20_total_metric": 0.1300000101327896, |
| "tpp_threshold_20_intended_diff_only": 0.14300000667572021, |
| "tpp_threshold_20_unintended_diff_only": 0.012999996542930603, |
| "tpp_threshold_50_total_metric": 0.31550003588199615, |
| "tpp_threshold_50_intended_diff_only": 0.3630000352859497, |
| "tpp_threshold_50_unintended_diff_only": 0.04749999940395355, |
| "tpp_threshold_100_total_metric": 0.36750003695487976, |
| "tpp_threshold_100_intended_diff_only": 0.4360000491142273, |
| "tpp_threshold_100_unintended_diff_only": 0.06850001215934753, |
| "tpp_threshold_500_total_metric": 0.3422500640153885, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.10175000131130219 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.002749994397163391, |
| "tpp_threshold_2_intended_diff_only": 0.003000020980834961, |
| "tpp_threshold_2_unintended_diff_only": 0.005750015377998352, |
| "tpp_threshold_5_total_metric": 0.0012500286102294922, |
| "tpp_threshold_5_intended_diff_only": 0.00700002908706665, |
| "tpp_threshold_5_unintended_diff_only": 0.005750000476837158, |
| "tpp_threshold_10_total_metric": 0.026000022888183594, |
| "tpp_threshold_10_intended_diff_only": 0.030000030994415283, |
| "tpp_threshold_10_unintended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_20_total_metric": 0.06000004708766937, |
| "tpp_threshold_20_intended_diff_only": 0.07500004768371582, |
| "tpp_threshold_20_unintended_diff_only": 0.015000000596046448, |
| "tpp_threshold_50_total_metric": 0.13500000536441803, |
| "tpp_threshold_50_intended_diff_only": 0.15200001001358032, |
| "tpp_threshold_50_unintended_diff_only": 0.017000004649162292, |
| "tpp_threshold_100_total_metric": 0.2792500704526901, |
| "tpp_threshold_100_intended_diff_only": 0.3840000629425049, |
| "tpp_threshold_100_unintended_diff_only": 0.10474999248981476, |
| "tpp_threshold_500_total_metric": 0.247000053524971, |
| "tpp_threshold_500_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_500_unintended_diff_only": 0.17400000989437103 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": -0.011000022292137146, |
| "tpp_threshold_2_intended_diff_only": -0.008000016212463379, |
| "tpp_threshold_2_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_5_total_metric": 0.013999983668327332, |
| "tpp_threshold_5_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_5_unintended_diff_only": 0.005000010132789612, |
| "tpp_threshold_10_total_metric": 0.033499956130981445, |
| "tpp_threshold_10_intended_diff_only": 0.04899996519088745, |
| "tpp_threshold_10_unintended_diff_only": 0.015500009059906006, |
| "tpp_threshold_20_total_metric": 0.11624997854232788, |
| "tpp_threshold_20_intended_diff_only": 0.1340000033378601, |
| "tpp_threshold_20_unintended_diff_only": 0.017750024795532227, |
| "tpp_threshold_50_total_metric": 0.2904999852180481, |
| "tpp_threshold_50_intended_diff_only": 0.3149999976158142, |
| "tpp_threshold_50_unintended_diff_only": 0.024500012397766113, |
| "tpp_threshold_100_total_metric": 0.3622499853372574, |
| "tpp_threshold_100_intended_diff_only": 0.3970000147819519, |
| "tpp_threshold_100_unintended_diff_only": 0.03475002944469452, |
| "tpp_threshold_500_total_metric": 0.34849998354911804, |
| "tpp_threshold_500_intended_diff_only": 0.42000001668930054, |
| "tpp_threshold_500_unintended_diff_only": 0.0715000331401825 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.01724998652935028, |
| "tpp_threshold_2_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_2_unintended_diff_only": 0.0007500201463699341, |
| "tpp_threshold_5_total_metric": 0.05349999666213989, |
| "tpp_threshold_5_intended_diff_only": 0.050999999046325684, |
| "tpp_threshold_5_unintended_diff_only": -0.002499997615814209, |
| "tpp_threshold_10_total_metric": 0.1040000170469284, |
| "tpp_threshold_10_intended_diff_only": 0.11800003051757812, |
| "tpp_threshold_10_unintended_diff_only": 0.01400001347064972, |
| "tpp_threshold_20_total_metric": 0.18524999916553497, |
| "tpp_threshold_20_intended_diff_only": 0.20200002193450928, |
| "tpp_threshold_20_unintended_diff_only": 0.016750022768974304, |
| "tpp_threshold_50_total_metric": 0.3150000274181366, |
| "tpp_threshold_50_intended_diff_only": 0.3400000333786011, |
| "tpp_threshold_50_unintended_diff_only": 0.025000005960464478, |
| "tpp_threshold_100_total_metric": 0.3402499854564667, |
| "tpp_threshold_100_intended_diff_only": 0.375, |
| "tpp_threshold_100_unintended_diff_only": 0.034750014543533325, |
| "tpp_threshold_500_total_metric": 0.30650003254413605, |
| "tpp_threshold_500_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_500_unintended_diff_only": 0.06950001418590546 |
| } |
| } |
| } |
| } |