| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752595297, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.09577501267194748, |
| "tpp_threshold_2_intended_diff_only": 0.13370001316070557, |
| "tpp_threshold_2_unintended_diff_only": 0.03792500048875809, |
| "tpp_threshold_5_total_metric": 0.1066250130534172, |
| "tpp_threshold_5_intended_diff_only": 0.19700002074241638, |
| "tpp_threshold_5_unintended_diff_only": 0.09037500768899917, |
| "tpp_threshold_10_total_metric": 0.10720000118017196, |
| "tpp_threshold_10_intended_diff_only": 0.23860000371932985, |
| "tpp_threshold_10_unintended_diff_only": 0.13140000253915787, |
| "tpp_threshold_20_total_metric": 0.11999999731779099, |
| "tpp_threshold_20_intended_diff_only": 0.2837000072002411, |
| "tpp_threshold_20_unintended_diff_only": 0.1637000098824501, |
| "tpp_threshold_50_total_metric": 0.17365001738071442, |
| "tpp_threshold_50_intended_diff_only": 0.3700000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.1963500112295151, |
| "tpp_threshold_100_total_metric": 0.20192500501871108, |
| "tpp_threshold_100_intended_diff_only": 0.4146000146865845, |
| "tpp_threshold_100_unintended_diff_only": 0.21267500966787337, |
| "tpp_threshold_500_total_metric": 0.19970003366470337, |
| "tpp_threshold_500_intended_diff_only": 0.4451000452041626, |
| "tpp_threshold_500_unintended_diff_only": 0.24540001153945923 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.18645002841949462, |
| "tpp_threshold_2_intended_diff_only": 0.25760003328323366, |
| "tpp_threshold_2_unintended_diff_only": 0.07115000486373901, |
| "tpp_threshold_5_total_metric": 0.18865001499652861, |
| "tpp_threshold_5_intended_diff_only": 0.360200035572052, |
| "tpp_threshold_5_unintended_diff_only": 0.17155002057552338, |
| "tpp_threshold_10_total_metric": 0.14434999525547026, |
| "tpp_threshold_10_intended_diff_only": 0.39360001087188723, |
| "tpp_threshold_10_unintended_diff_only": 0.24925001561641694, |
| "tpp_threshold_20_total_metric": 0.11660000085830688, |
| "tpp_threshold_20_intended_diff_only": 0.42280001640319825, |
| "tpp_threshold_20_unintended_diff_only": 0.30620001554489135, |
| "tpp_threshold_50_total_metric": 0.09555000364780426, |
| "tpp_threshold_50_intended_diff_only": 0.45420002937316895, |
| "tpp_threshold_50_unintended_diff_only": 0.3586500257253647, |
| "tpp_threshold_100_total_metric": 0.0835000067949295, |
| "tpp_threshold_100_intended_diff_only": 0.4646000266075134, |
| "tpp_threshold_100_unintended_diff_only": 0.3811000198125839, |
| "tpp_threshold_500_total_metric": 0.06215002834796905, |
| "tpp_threshold_500_intended_diff_only": 0.46920005083084104, |
| "tpp_threshold_500_unintended_diff_only": 0.40705002248287203 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.00509999692440033, |
| "tpp_threshold_2_intended_diff_only": 0.00979999303817749, |
| "tpp_threshold_2_unintended_diff_only": 0.004699996113777161, |
| "tpp_threshold_5_total_metric": 0.024600011110305787, |
| "tpp_threshold_5_intended_diff_only": 0.03380000591278076, |
| "tpp_threshold_5_unintended_diff_only": 0.009199994802474975, |
| "tpp_threshold_10_total_metric": 0.07005000710487366, |
| "tpp_threshold_10_intended_diff_only": 0.08359999656677246, |
| "tpp_threshold_10_unintended_diff_only": 0.013549989461898804, |
| "tpp_threshold_20_total_metric": 0.12339999377727509, |
| "tpp_threshold_20_intended_diff_only": 0.14459999799728393, |
| "tpp_threshold_20_unintended_diff_only": 0.02120000422000885, |
| "tpp_threshold_50_total_metric": 0.2517500311136246, |
| "tpp_threshold_50_intended_diff_only": 0.28580002784729003, |
| "tpp_threshold_50_unintended_diff_only": 0.03404999673366547, |
| "tpp_threshold_100_total_metric": 0.3203500032424927, |
| "tpp_threshold_100_intended_diff_only": 0.3646000027656555, |
| "tpp_threshold_100_unintended_diff_only": 0.044249999523162845, |
| "tpp_threshold_500_total_metric": 0.3372500389814377, |
| "tpp_threshold_500_intended_diff_only": 0.4210000395774841, |
| "tpp_threshold_500_unintended_diff_only": 0.08375000059604645 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.12.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.12.hook_resid_post", |
| "hook_layer": 12, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": "gemma-2-2b/12-res-matryoshka-dc", |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.22425004839897156, |
| "tpp_threshold_2_intended_diff_only": 0.2640000581741333, |
| "tpp_threshold_2_unintended_diff_only": 0.03975000977516174, |
| "tpp_threshold_5_total_metric": 0.20450004935264587, |
| "tpp_threshold_5_intended_diff_only": 0.35200005769729614, |
| "tpp_threshold_5_unintended_diff_only": 0.14750000834465027, |
| "tpp_threshold_10_total_metric": 0.1757500022649765, |
| "tpp_threshold_10_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_10_unintended_diff_only": 0.19825001060962677, |
| "tpp_threshold_20_total_metric": 0.11675001680850983, |
| "tpp_threshold_20_intended_diff_only": 0.4150000214576721, |
| "tpp_threshold_20_unintended_diff_only": 0.2982500046491623, |
| "tpp_threshold_50_total_metric": 0.08200004696846008, |
| "tpp_threshold_50_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_50_unintended_diff_only": 0.3620000183582306, |
| "tpp_threshold_100_total_metric": 0.07000002264976501, |
| "tpp_threshold_100_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_100_unintended_diff_only": 0.3800000250339508, |
| "tpp_threshold_500_total_metric": 0.048500046133995056, |
| "tpp_threshold_500_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_500_unintended_diff_only": 0.40950001776218414 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.14475004374980927, |
| "tpp_threshold_2_intended_diff_only": 0.23200005292892456, |
| "tpp_threshold_2_unintended_diff_only": 0.0872500091791153, |
| "tpp_threshold_5_total_metric": 0.15125001966953278, |
| "tpp_threshold_5_intended_diff_only": 0.3020000457763672, |
| "tpp_threshold_5_unintended_diff_only": 0.1507500261068344, |
| "tpp_threshold_10_total_metric": 0.11549998819828033, |
| "tpp_threshold_10_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_10_unintended_diff_only": 0.23150001466274261, |
| "tpp_threshold_20_total_metric": 0.07825002074241638, |
| "tpp_threshold_20_intended_diff_only": 0.38600003719329834, |
| "tpp_threshold_20_unintended_diff_only": 0.30775001645088196, |
| "tpp_threshold_50_total_metric": 0.07075002789497375, |
| "tpp_threshold_50_intended_diff_only": 0.4310000538825989, |
| "tpp_threshold_50_unintended_diff_only": 0.3602500259876251, |
| "tpp_threshold_100_total_metric": 0.07225000858306885, |
| "tpp_threshold_100_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_100_unintended_diff_only": 0.38475000858306885, |
| "tpp_threshold_500_total_metric": 0.05825003981590271, |
| "tpp_threshold_500_intended_diff_only": 0.4630000591278076, |
| "tpp_threshold_500_unintended_diff_only": 0.4047500193119049 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.132999986410141, |
| "tpp_threshold_2_intended_diff_only": 0.24199998378753662, |
| "tpp_threshold_2_unintended_diff_only": 0.10899999737739563, |
| "tpp_threshold_5_total_metric": 0.1315000057220459, |
| "tpp_threshold_5_intended_diff_only": 0.32200002670288086, |
| "tpp_threshold_5_unintended_diff_only": 0.19050002098083496, |
| "tpp_threshold_10_total_metric": 0.06574997305870056, |
| "tpp_threshold_10_intended_diff_only": 0.3619999885559082, |
| "tpp_threshold_10_unintended_diff_only": 0.29625001549720764, |
| "tpp_threshold_20_total_metric": 0.0627499669790268, |
| "tpp_threshold_20_intended_diff_only": 0.38499999046325684, |
| "tpp_threshold_20_unintended_diff_only": 0.32225002348423004, |
| "tpp_threshold_50_total_metric": 0.08000001311302185, |
| "tpp_threshold_50_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_50_unintended_diff_only": 0.3570000231266022, |
| "tpp_threshold_100_total_metric": 0.07574999332427979, |
| "tpp_threshold_100_intended_diff_only": 0.4520000219345093, |
| "tpp_threshold_100_unintended_diff_only": 0.3762500286102295, |
| "tpp_threshold_500_total_metric": 0.04125000536441803, |
| "tpp_threshold_500_intended_diff_only": 0.45500004291534424, |
| "tpp_threshold_500_unintended_diff_only": 0.4137500375509262 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.2780000567436218, |
| "tpp_threshold_2_intended_diff_only": 0.3250000476837158, |
| "tpp_threshold_2_unintended_diff_only": 0.046999990940093994, |
| "tpp_threshold_5_total_metric": 0.33150002360343933, |
| "tpp_threshold_5_intended_diff_only": 0.44600003957748413, |
| "tpp_threshold_5_unintended_diff_only": 0.1145000159740448, |
| "tpp_threshold_10_total_metric": 0.26775000989437103, |
| "tpp_threshold_10_intended_diff_only": 0.4710000157356262, |
| "tpp_threshold_10_unintended_diff_only": 0.2032500058412552, |
| "tpp_threshold_20_total_metric": 0.22550003230571747, |
| "tpp_threshold_20_intended_diff_only": 0.47800004482269287, |
| "tpp_threshold_20_unintended_diff_only": 0.2525000125169754, |
| "tpp_threshold_50_total_metric": 0.13999997079372406, |
| "tpp_threshold_50_intended_diff_only": 0.48100000619888306, |
| "tpp_threshold_50_unintended_diff_only": 0.341000035405159, |
| "tpp_threshold_100_total_metric": 0.11675000190734863, |
| "tpp_threshold_100_intended_diff_only": 0.48500001430511475, |
| "tpp_threshold_100_unintended_diff_only": 0.3682500123977661, |
| "tpp_threshold_500_total_metric": 0.09075003862380981, |
| "tpp_threshold_500_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_500_unintended_diff_only": 0.40025001764297485 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.1522500067949295, |
| "tpp_threshold_2_intended_diff_only": 0.2250000238418579, |
| "tpp_threshold_2_unintended_diff_only": 0.0727500170469284, |
| "tpp_threshold_5_total_metric": 0.12449997663497925, |
| "tpp_threshold_5_intended_diff_only": 0.3790000081062317, |
| "tpp_threshold_5_unintended_diff_only": 0.25450003147125244, |
| "tpp_threshold_10_total_metric": 0.09700000286102295, |
| "tpp_threshold_10_intended_diff_only": 0.4140000343322754, |
| "tpp_threshold_10_unintended_diff_only": 0.31700003147125244, |
| "tpp_threshold_20_total_metric": 0.09974996745586395, |
| "tpp_threshold_20_intended_diff_only": 0.44999998807907104, |
| "tpp_threshold_20_unintended_diff_only": 0.3502500206232071, |
| "tpp_threshold_50_total_metric": 0.10499995946884155, |
| "tpp_threshold_50_intended_diff_only": 0.4779999852180481, |
| "tpp_threshold_50_unintended_diff_only": 0.37300002574920654, |
| "tpp_threshold_100_total_metric": 0.08275000751018524, |
| "tpp_threshold_100_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_100_unintended_diff_only": 0.39625002443790436, |
| "tpp_threshold_500_total_metric": 0.07200001180171967, |
| "tpp_threshold_500_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_500_unintended_diff_only": 0.40700002014636993 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.010500043630599976, |
| "tpp_threshold_2_intended_diff_only": 0.01500004529953003, |
| "tpp_threshold_2_unintended_diff_only": 0.004500001668930054, |
| "tpp_threshold_5_total_metric": 0.00600007176399231, |
| "tpp_threshold_5_intended_diff_only": 0.010000050067901611, |
| "tpp_threshold_5_unintended_diff_only": 0.003999978303909302, |
| "tpp_threshold_10_total_metric": 0.01400001347064972, |
| "tpp_threshold_10_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_10_unintended_diff_only": 0.008999988436698914, |
| "tpp_threshold_20_total_metric": 0.05600005388259888, |
| "tpp_threshold_20_intended_diff_only": 0.0700000524520874, |
| "tpp_threshold_20_unintended_diff_only": 0.013999998569488525, |
| "tpp_threshold_50_total_metric": 0.20850007236003876, |
| "tpp_threshold_50_intended_diff_only": 0.23600006103515625, |
| "tpp_threshold_50_unintended_diff_only": 0.027499988675117493, |
| "tpp_threshold_100_total_metric": 0.3085000365972519, |
| "tpp_threshold_100_intended_diff_only": 0.3450000286102295, |
| "tpp_threshold_100_unintended_diff_only": 0.0364999920129776, |
| "tpp_threshold_500_total_metric": 0.3837500810623169, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.06924998760223389 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.004999935626983643, |
| "tpp_threshold_2_intended_diff_only": 0.008999943733215332, |
| "tpp_threshold_2_unintended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_5_total_metric": 0.0037500113248825073, |
| "tpp_threshold_5_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_5_unintended_diff_only": 0.015249982476234436, |
| "tpp_threshold_10_total_metric": 0.05750000476837158, |
| "tpp_threshold_10_intended_diff_only": 0.06999999284744263, |
| "tpp_threshold_10_unintended_diff_only": 0.012499988079071045, |
| "tpp_threshold_20_total_metric": 0.10599994659423828, |
| "tpp_threshold_20_intended_diff_only": 0.11899995803833008, |
| "tpp_threshold_20_unintended_diff_only": 0.013000011444091797, |
| "tpp_threshold_50_total_metric": 0.275749996304512, |
| "tpp_threshold_50_intended_diff_only": 0.3100000023841858, |
| "tpp_threshold_50_unintended_diff_only": 0.03425000607967377, |
| "tpp_threshold_100_total_metric": 0.35874998569488525, |
| "tpp_threshold_100_intended_diff_only": 0.3999999761581421, |
| "tpp_threshold_100_unintended_diff_only": 0.041249990463256836, |
| "tpp_threshold_500_total_metric": 0.341499999165535, |
| "tpp_threshold_500_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_500_unintended_diff_only": 0.09250001609325409 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.009750008583068848, |
| "tpp_threshold_2_intended_diff_only": -0.003000020980834961, |
| "tpp_threshold_2_unintended_diff_only": 0.006749987602233887, |
| "tpp_threshold_5_total_metric": 0.007749959826469421, |
| "tpp_threshold_5_intended_diff_only": 0.015999972820281982, |
| "tpp_threshold_5_unintended_diff_only": 0.008250012993812561, |
| "tpp_threshold_10_total_metric": 0.03950001299381256, |
| "tpp_threshold_10_intended_diff_only": 0.050999999046325684, |
| "tpp_threshold_10_unintended_diff_only": 0.011499986052513123, |
| "tpp_threshold_20_total_metric": 0.072999969124794, |
| "tpp_threshold_20_intended_diff_only": 0.09499996900558472, |
| "tpp_threshold_20_unintended_diff_only": 0.02199999988079071, |
| "tpp_threshold_50_total_metric": 0.1652500331401825, |
| "tpp_threshold_50_intended_diff_only": 0.19700002670288086, |
| "tpp_threshold_50_unintended_diff_only": 0.031749993562698364, |
| "tpp_threshold_100_total_metric": 0.2590000182390213, |
| "tpp_threshold_100_intended_diff_only": 0.30800002813339233, |
| "tpp_threshold_100_unintended_diff_only": 0.04900000989437103, |
| "tpp_threshold_500_total_metric": 0.33275003731250763, |
| "tpp_threshold_500_intended_diff_only": 0.4280000329017639, |
| "tpp_threshold_500_unintended_diff_only": 0.09524999558925629 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.004250004887580872, |
| "tpp_threshold_2_intended_diff_only": 0.009000003337860107, |
| "tpp_threshold_2_unintended_diff_only": 0.004749998450279236, |
| "tpp_threshold_5_total_metric": 0.045000046491622925, |
| "tpp_threshold_5_intended_diff_only": 0.05600005388259888, |
| "tpp_threshold_5_unintended_diff_only": 0.011000007390975952, |
| "tpp_threshold_10_total_metric": 0.10975003242492676, |
| "tpp_threshold_10_intended_diff_only": 0.12800002098083496, |
| "tpp_threshold_10_unintended_diff_only": 0.018249988555908203, |
| "tpp_threshold_20_total_metric": 0.15524999797344208, |
| "tpp_threshold_20_intended_diff_only": 0.18000000715255737, |
| "tpp_threshold_20_unintended_diff_only": 0.024750009179115295, |
| "tpp_threshold_50_total_metric": 0.30125007033348083, |
| "tpp_threshold_50_intended_diff_only": 0.3380000591278076, |
| "tpp_threshold_50_unintended_diff_only": 0.03674998879432678, |
| "tpp_threshold_100_total_metric": 0.3515000194311142, |
| "tpp_threshold_100_intended_diff_only": 0.3970000147819519, |
| "tpp_threshold_100_unintended_diff_only": 0.04549999535083771, |
| "tpp_threshold_500_total_metric": 0.32325008511543274, |
| "tpp_threshold_500_intended_diff_only": 0.4160000681877136, |
| "tpp_threshold_500_unintended_diff_only": 0.09274998307228088 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.015500009059906006, |
| "tpp_threshold_2_intended_diff_only": 0.018999993801116943, |
| "tpp_threshold_2_unintended_diff_only": 0.0034999847412109375, |
| "tpp_threshold_5_total_metric": 0.06049996614456177, |
| "tpp_threshold_5_intended_diff_only": 0.0679999589920044, |
| "tpp_threshold_5_unintended_diff_only": 0.007499992847442627, |
| "tpp_threshold_10_total_metric": 0.12949997186660767, |
| "tpp_threshold_10_intended_diff_only": 0.1459999680519104, |
| "tpp_threshold_10_unintended_diff_only": 0.016499996185302734, |
| "tpp_threshold_20_total_metric": 0.22675000131130219, |
| "tpp_threshold_20_intended_diff_only": 0.2590000033378601, |
| "tpp_threshold_20_unintended_diff_only": 0.03225000202655792, |
| "tpp_threshold_50_total_metric": 0.30799998342990875, |
| "tpp_threshold_50_intended_diff_only": 0.3479999899864197, |
| "tpp_threshold_50_unintended_diff_only": 0.040000006556510925, |
| "tpp_threshold_100_total_metric": 0.32399995625019073, |
| "tpp_threshold_100_intended_diff_only": 0.37299996614456177, |
| "tpp_threshold_100_unintended_diff_only": 0.04900000989437103, |
| "tpp_threshold_500_total_metric": 0.3049999922513962, |
| "tpp_threshold_500_intended_diff_only": 0.37400001287460327, |
| "tpp_threshold_500_unintended_diff_only": 0.06900002062320709 |
| } |
| } |
| } |
| } |