| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745617723551, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.038850000500679015, |
| "tpp_threshold_2_intended_diff_only": 0.049000000953674315, |
| "tpp_threshold_2_unintended_diff_only": 0.0101500004529953, |
| "tpp_threshold_5_total_metric": 0.12040000706911087, |
| "tpp_threshold_5_intended_diff_only": 0.14330000281333924, |
| "tpp_threshold_5_unintended_diff_only": 0.022899995744228366, |
| "tpp_threshold_10_total_metric": 0.2019500106573105, |
| "tpp_threshold_10_intended_diff_only": 0.2459000051021576, |
| "tpp_threshold_10_unintended_diff_only": 0.04394999444484711, |
| "tpp_threshold_20_total_metric": 0.2322000116109848, |
| "tpp_threshold_20_intended_diff_only": 0.3064000129699707, |
| "tpp_threshold_20_unintended_diff_only": 0.0742000013589859, |
| "tpp_threshold_50_total_metric": 0.2570000171661377, |
| "tpp_threshold_50_intended_diff_only": 0.3782000124454498, |
| "tpp_threshold_50_unintended_diff_only": 0.12119999527931213, |
| "tpp_threshold_100_total_metric": 0.26517502069473264, |
| "tpp_threshold_100_intended_diff_only": 0.4317000210285187, |
| "tpp_threshold_100_unintended_diff_only": 0.166525000333786, |
| "tpp_threshold_500_total_metric": 0.22257502973079682, |
| "tpp_threshold_500_intended_diff_only": 0.4473000347614288, |
| "tpp_threshold_500_unintended_diff_only": 0.22472500503063203 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.05064999759197235, |
| "tpp_threshold_2_intended_diff_only": 0.06540000438690186, |
| "tpp_threshold_2_unintended_diff_only": 0.014750006794929504, |
| "tpp_threshold_5_total_metric": 0.17250001132488252, |
| "tpp_threshold_5_intended_diff_only": 0.20980000495910645, |
| "tpp_threshold_5_unintended_diff_only": 0.03729999363422394, |
| "tpp_threshold_10_total_metric": 0.29605001509189605, |
| "tpp_threshold_10_intended_diff_only": 0.3730000138282776, |
| "tpp_threshold_10_unintended_diff_only": 0.07694999873638153, |
| "tpp_threshold_20_total_metric": 0.29380002021789553, |
| "tpp_threshold_20_intended_diff_only": 0.42300002574920653, |
| "tpp_threshold_20_unintended_diff_only": 0.12920000553131103, |
| "tpp_threshold_50_total_metric": 0.24560001492500305, |
| "tpp_threshold_50_intended_diff_only": 0.4588000178337097, |
| "tpp_threshold_50_unintended_diff_only": 0.21320000290870667, |
| "tpp_threshold_100_total_metric": 0.1796500265598297, |
| "tpp_threshold_100_intended_diff_only": 0.46660003662109373, |
| "tpp_threshold_100_unintended_diff_only": 0.286950010061264, |
| "tpp_threshold_500_total_metric": 0.09320002794265747, |
| "tpp_threshold_500_intended_diff_only": 0.4698000431060791, |
| "tpp_threshold_500_unintended_diff_only": 0.3766000151634216 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.02705000340938568, |
| "tpp_threshold_2_intended_diff_only": 0.032599997520446775, |
| "tpp_threshold_2_unintended_diff_only": 0.005549994111061096, |
| "tpp_threshold_5_total_metric": 0.06830000281333923, |
| "tpp_threshold_5_intended_diff_only": 0.07680000066757202, |
| "tpp_threshold_5_unintended_diff_only": 0.008499997854232787, |
| "tpp_threshold_10_total_metric": 0.10785000622272492, |
| "tpp_threshold_10_intended_diff_only": 0.1187999963760376, |
| "tpp_threshold_10_unintended_diff_only": 0.010949990153312683, |
| "tpp_threshold_20_total_metric": 0.1706000030040741, |
| "tpp_threshold_20_intended_diff_only": 0.18980000019073487, |
| "tpp_threshold_20_unintended_diff_only": 0.019199997186660767, |
| "tpp_threshold_50_total_metric": 0.2684000194072723, |
| "tpp_threshold_50_intended_diff_only": 0.2976000070571899, |
| "tpp_threshold_50_unintended_diff_only": 0.029199987649917603, |
| "tpp_threshold_100_total_metric": 0.35070001482963564, |
| "tpp_threshold_100_intended_diff_only": 0.3968000054359436, |
| "tpp_threshold_100_unintended_diff_only": 0.04609999060630798, |
| "tpp_threshold_500_total_metric": 0.35195003151893617, |
| "tpp_threshold_500_intended_diff_only": 0.42480002641677855, |
| "tpp_threshold_500_unintended_diff_only": 0.07284999489784241 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.7.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.7.hook_resid_post", |
| "hook_layer": 7, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.1797499656677246, |
| "tpp_threshold_2_intended_diff_only": 0.23199999332427979, |
| "tpp_threshold_2_unintended_diff_only": 0.052250027656555176, |
| "tpp_threshold_5_total_metric": 0.21899999678134918, |
| "tpp_threshold_5_intended_diff_only": 0.3059999942779541, |
| "tpp_threshold_5_unintended_diff_only": 0.08699999749660492, |
| "tpp_threshold_10_total_metric": 0.23475001752376556, |
| "tpp_threshold_10_intended_diff_only": 0.3410000205039978, |
| "tpp_threshold_10_unintended_diff_only": 0.10625000298023224, |
| "tpp_threshold_20_total_metric": 0.20399999618530273, |
| "tpp_threshold_20_intended_diff_only": 0.4020000100135803, |
| "tpp_threshold_20_unintended_diff_only": 0.1980000138282776, |
| "tpp_threshold_50_total_metric": 0.16349998116493225, |
| "tpp_threshold_50_intended_diff_only": 0.4399999976158142, |
| "tpp_threshold_50_unintended_diff_only": 0.27650001645088196, |
| "tpp_threshold_100_total_metric": 0.125249981880188, |
| "tpp_threshold_100_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_100_unintended_diff_only": 0.32475000619888306, |
| "tpp_threshold_500_total_metric": 0.0637499988079071, |
| "tpp_threshold_500_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_500_unintended_diff_only": 0.3882500231266022 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.018250003457069397, |
| "tpp_threshold_2_intended_diff_only": 0.02799999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.009749993681907654, |
| "tpp_threshold_5_total_metric": 0.03999999165534973, |
| "tpp_threshold_5_intended_diff_only": 0.05199998617172241, |
| "tpp_threshold_5_unintended_diff_only": 0.01199999451637268, |
| "tpp_threshold_10_total_metric": 0.23900000751018524, |
| "tpp_threshold_10_intended_diff_only": 0.3190000057220459, |
| "tpp_threshold_10_unintended_diff_only": 0.07999999821186066, |
| "tpp_threshold_20_total_metric": 0.28049997985363007, |
| "tpp_threshold_20_intended_diff_only": 0.3709999918937683, |
| "tpp_threshold_20_unintended_diff_only": 0.09050001204013824, |
| "tpp_threshold_50_total_metric": 0.19350004196166992, |
| "tpp_threshold_50_intended_diff_only": 0.4230000376701355, |
| "tpp_threshold_50_unintended_diff_only": 0.22949999570846558, |
| "tpp_threshold_100_total_metric": 0.1510000377893448, |
| "tpp_threshold_100_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_100_unintended_diff_only": 0.29900000989437103, |
| "tpp_threshold_500_total_metric": 0.0805000364780426, |
| "tpp_threshold_500_intended_diff_only": 0.46400004625320435, |
| "tpp_threshold_500_unintended_diff_only": 0.38350000977516174 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.05224999785423279, |
| "tpp_threshold_2_intended_diff_only": 0.06000000238418579, |
| "tpp_threshold_2_unintended_diff_only": 0.007750004529953003, |
| "tpp_threshold_5_total_metric": 0.22624994814395905, |
| "tpp_threshold_5_intended_diff_only": 0.2669999599456787, |
| "tpp_threshold_5_unintended_diff_only": 0.040750011801719666, |
| "tpp_threshold_10_total_metric": 0.28224995732307434, |
| "tpp_threshold_10_intended_diff_only": 0.36399996280670166, |
| "tpp_threshold_10_unintended_diff_only": 0.08175000548362732, |
| "tpp_threshold_20_total_metric": 0.3044999837875366, |
| "tpp_threshold_20_intended_diff_only": 0.41200000047683716, |
| "tpp_threshold_20_unintended_diff_only": 0.10750001668930054, |
| "tpp_threshold_50_total_metric": 0.2827499657869339, |
| "tpp_threshold_50_intended_diff_only": 0.4559999704360962, |
| "tpp_threshold_50_unintended_diff_only": 0.1732500046491623, |
| "tpp_threshold_100_total_metric": 0.19200000166893005, |
| "tpp_threshold_100_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_100_unintended_diff_only": 0.26500001549720764, |
| "tpp_threshold_500_total_metric": 0.060749977827072144, |
| "tpp_threshold_500_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_500_unintended_diff_only": 0.39625003933906555 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.002750009298324585, |
| "tpp_threshold_2_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.0012499988079071045, |
| "tpp_threshold_5_total_metric": 0.17850005626678467, |
| "tpp_threshold_5_intended_diff_only": 0.19200003147125244, |
| "tpp_threshold_5_unintended_diff_only": 0.013499975204467773, |
| "tpp_threshold_10_total_metric": 0.43375004827976227, |
| "tpp_threshold_10_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_10_unintended_diff_only": 0.01724998652935028, |
| "tpp_threshold_20_total_metric": 0.40250006318092346, |
| "tpp_threshold_20_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_20_unintended_diff_only": 0.07850000262260437, |
| "tpp_threshold_50_total_metric": 0.34975002706050873, |
| "tpp_threshold_50_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_50_unintended_diff_only": 0.14424999058246613, |
| "tpp_threshold_100_total_metric": 0.24100004136562347, |
| "tpp_threshold_100_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_100_unintended_diff_only": 0.2540000230073929, |
| "tpp_threshold_500_total_metric": 0.15100005269050598, |
| "tpp_threshold_500_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_500_unintended_diff_only": 0.3440000116825104 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.000250011682510376, |
| "tpp_threshold_2_intended_diff_only": 0.003000020980834961, |
| "tpp_threshold_2_unintended_diff_only": 0.002750009298324585, |
| "tpp_threshold_5_total_metric": 0.1987500637769699, |
| "tpp_threshold_5_intended_diff_only": 0.23200005292892456, |
| "tpp_threshold_5_unintended_diff_only": 0.03324998915195465, |
| "tpp_threshold_10_total_metric": 0.29050004482269287, |
| "tpp_threshold_10_intended_diff_only": 0.39000004529953003, |
| "tpp_threshold_10_unintended_diff_only": 0.09950000047683716, |
| "tpp_threshold_20_total_metric": 0.27750007808208466, |
| "tpp_threshold_20_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_20_unintended_diff_only": 0.17149998247623444, |
| "tpp_threshold_50_total_metric": 0.23850005865097046, |
| "tpp_threshold_50_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_50_unintended_diff_only": 0.24250000715255737, |
| "tpp_threshold_100_total_metric": 0.18900007009506226, |
| "tpp_threshold_100_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_100_unintended_diff_only": 0.2919999957084656, |
| "tpp_threshold_500_total_metric": 0.11000007390975952, |
| "tpp_threshold_500_intended_diff_only": 0.48100006580352783, |
| "tpp_threshold_500_unintended_diff_only": 0.3709999918937683 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.005000025033950806, |
| "tpp_threshold_2_intended_diff_only": 0.008000016212463379, |
| "tpp_threshold_2_unintended_diff_only": 0.0029999911785125732, |
| "tpp_threshold_5_total_metric": 0.00925000011920929, |
| "tpp_threshold_5_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_5_unintended_diff_only": 0.004749998450279236, |
| "tpp_threshold_10_total_metric": 0.025999993085861206, |
| "tpp_threshold_10_intended_diff_only": 0.03299999237060547, |
| "tpp_threshold_10_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_20_total_metric": 0.10500004887580872, |
| "tpp_threshold_20_intended_diff_only": 0.12700003385543823, |
| "tpp_threshold_20_unintended_diff_only": 0.021999984979629517, |
| "tpp_threshold_50_total_metric": 0.25850003957748413, |
| "tpp_threshold_50_intended_diff_only": 0.2900000214576721, |
| "tpp_threshold_50_unintended_diff_only": 0.03149998188018799, |
| "tpp_threshold_100_total_metric": 0.3562500327825546, |
| "tpp_threshold_100_intended_diff_only": 0.3930000066757202, |
| "tpp_threshold_100_unintended_diff_only": 0.03674997389316559, |
| "tpp_threshold_500_total_metric": 0.38600000739097595, |
| "tpp_threshold_500_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_500_unintended_diff_only": 0.06299999356269836 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.07375000417232513, |
| "tpp_threshold_2_intended_diff_only": 0.08300000429153442, |
| "tpp_threshold_2_unintended_diff_only": 0.00925000011920929, |
| "tpp_threshold_5_total_metric": 0.21250000596046448, |
| "tpp_threshold_5_intended_diff_only": 0.23100000619888306, |
| "tpp_threshold_5_unintended_diff_only": 0.01850000023841858, |
| "tpp_threshold_10_total_metric": 0.29649998247623444, |
| "tpp_threshold_10_intended_diff_only": 0.31699997186660767, |
| "tpp_threshold_10_unintended_diff_only": 0.02049998939037323, |
| "tpp_threshold_20_total_metric": 0.35749997198581696, |
| "tpp_threshold_20_intended_diff_only": 0.38599997758865356, |
| "tpp_threshold_20_unintended_diff_only": 0.02850000560283661, |
| "tpp_threshold_50_total_metric": 0.3854999989271164, |
| "tpp_threshold_50_intended_diff_only": 0.44099998474121094, |
| "tpp_threshold_50_unintended_diff_only": 0.05549998581409454, |
| "tpp_threshold_100_total_metric": 0.3775000125169754, |
| "tpp_threshold_100_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_100_unintended_diff_only": 0.06550000607967377, |
| "tpp_threshold_500_total_metric": 0.34925001859664917, |
| "tpp_threshold_500_intended_diff_only": 0.44300001859664917, |
| "tpp_threshold_500_unintended_diff_only": 0.09375 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.004249989986419678, |
| "tpp_threshold_2_intended_diff_only": 0.004999995231628418, |
| "tpp_threshold_2_unintended_diff_only": 0.0007500052452087402, |
| "tpp_threshold_5_total_metric": 0.004999995231628418, |
| "tpp_threshold_5_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_5_unintended_diff_only": 0.004999995231628418, |
| "tpp_threshold_10_total_metric": -0.0027499794960021973, |
| "tpp_threshold_10_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_10_unintended_diff_only": 0.006749987602233887, |
| "tpp_threshold_20_total_metric": 0.014249980449676514, |
| "tpp_threshold_20_intended_diff_only": 0.02399998903274536, |
| "tpp_threshold_20_unintended_diff_only": 0.009750008583068848, |
| "tpp_threshold_50_total_metric": 0.06974999606609344, |
| "tpp_threshold_50_intended_diff_only": 0.08399999141693115, |
| "tpp_threshold_50_unintended_diff_only": 0.014249995350837708, |
| "tpp_threshold_100_total_metric": 0.28700001537799835, |
| "tpp_threshold_100_intended_diff_only": 0.3610000014305115, |
| "tpp_threshold_100_unintended_diff_only": 0.07399998605251312, |
| "tpp_threshold_500_total_metric": 0.2877500355243683, |
| "tpp_threshold_500_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_500_unintended_diff_only": 0.13124999403953552 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.0059999823570251465, |
| "tpp_threshold_2_intended_diff_only": 0.015999972820281982, |
| "tpp_threshold_2_unintended_diff_only": 0.009999990463256836, |
| "tpp_threshold_5_total_metric": 0.04324997961521149, |
| "tpp_threshold_5_intended_diff_only": 0.05199998617172241, |
| "tpp_threshold_5_unintended_diff_only": 0.008750006556510925, |
| "tpp_threshold_10_total_metric": 0.10624997317790985, |
| "tpp_threshold_10_intended_diff_only": 0.11399996280670166, |
| "tpp_threshold_10_unintended_diff_only": 0.007749989628791809, |
| "tpp_threshold_20_total_metric": 0.16899999976158142, |
| "tpp_threshold_20_intended_diff_only": 0.1850000023841858, |
| "tpp_threshold_20_unintended_diff_only": 0.01600000262260437, |
| "tpp_threshold_50_total_metric": 0.3045000284910202, |
| "tpp_threshold_50_intended_diff_only": 0.3230000138282776, |
| "tpp_threshold_50_unintended_diff_only": 0.018499985337257385, |
| "tpp_threshold_100_total_metric": 0.39149999618530273, |
| "tpp_threshold_100_intended_diff_only": 0.4129999876022339, |
| "tpp_threshold_100_unintended_diff_only": 0.021499991416931152, |
| "tpp_threshold_500_total_metric": 0.39900003373622894, |
| "tpp_threshold_500_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_500_unintended_diff_only": 0.03399999439716339 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.04625001549720764, |
| "tpp_threshold_2_intended_diff_only": 0.050999999046325684, |
| "tpp_threshold_2_unintended_diff_only": 0.004749983549118042, |
| "tpp_threshold_5_total_metric": 0.0715000331401825, |
| "tpp_threshold_5_intended_diff_only": 0.07700002193450928, |
| "tpp_threshold_5_unintended_diff_only": 0.005499988794326782, |
| "tpp_threshold_10_total_metric": 0.11325006186962128, |
| "tpp_threshold_10_intended_diff_only": 0.1260000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.012749984860420227, |
| "tpp_threshold_20_total_metric": 0.20725001394748688, |
| "tpp_threshold_20_intended_diff_only": 0.22699999809265137, |
| "tpp_threshold_20_unintended_diff_only": 0.01974998414516449, |
| "tpp_threshold_50_total_metric": 0.3237500339746475, |
| "tpp_threshold_50_intended_diff_only": 0.3500000238418579, |
| "tpp_threshold_50_unintended_diff_only": 0.026249989867210388, |
| "tpp_threshold_100_total_metric": 0.341250017285347, |
| "tpp_threshold_100_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_100_unintended_diff_only": 0.03274999558925629, |
| "tpp_threshold_500_total_metric": 0.33775006234645844, |
| "tpp_threshold_500_intended_diff_only": 0.3800000548362732, |
| "tpp_threshold_500_unintended_diff_only": 0.04224999248981476 |
| } |
| } |
| } |
| } |