| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752908405, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.0738000065088272, |
| "tpp_threshold_2_intended_diff_only": 0.09070001244544983, |
| "tpp_threshold_2_unintended_diff_only": 0.01690000593662262, |
| "tpp_threshold_5_total_metric": 0.14772501438856125, |
| "tpp_threshold_5_intended_diff_only": 0.18490002155303953, |
| "tpp_threshold_5_unintended_diff_only": 0.0371750071644783, |
| "tpp_threshold_10_total_metric": 0.17620001137256622, |
| "tpp_threshold_10_intended_diff_only": 0.2523000240325928, |
| "tpp_threshold_10_unintended_diff_only": 0.07610001266002654, |
| "tpp_threshold_20_total_metric": 0.1892999976873398, |
| "tpp_threshold_20_intended_diff_only": 0.2999000132083893, |
| "tpp_threshold_20_unintended_diff_only": 0.11060001552104949, |
| "tpp_threshold_50_total_metric": 0.21547500044107437, |
| "tpp_threshold_50_intended_diff_only": 0.37060001492500305, |
| "tpp_threshold_50_unintended_diff_only": 0.15512501448392868, |
| "tpp_threshold_100_total_metric": 0.22912501394748688, |
| "tpp_threshold_100_intended_diff_only": 0.4137000322341919, |
| "tpp_threshold_100_unintended_diff_only": 0.184575018286705, |
| "tpp_threshold_500_total_metric": 0.22677503079175948, |
| "tpp_threshold_500_intended_diff_only": 0.45140004754066465, |
| "tpp_threshold_500_unintended_diff_only": 0.22462501674890517 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.129350009560585, |
| "tpp_threshold_2_intended_diff_only": 0.1532000184059143, |
| "tpp_threshold_2_unintended_diff_only": 0.023850008845329285, |
| "tpp_threshold_5_total_metric": 0.2576500177383423, |
| "tpp_threshold_5_intended_diff_only": 0.32000002861022947, |
| "tpp_threshold_5_unintended_diff_only": 0.062350010871887206, |
| "tpp_threshold_10_total_metric": 0.27050002217292785, |
| "tpp_threshold_10_intended_diff_only": 0.40520004034042356, |
| "tpp_threshold_10_unintended_diff_only": 0.13470001816749572, |
| "tpp_threshold_20_total_metric": 0.24729999899864197, |
| "tpp_threshold_20_intended_diff_only": 0.4436000227928162, |
| "tpp_threshold_20_unintended_diff_only": 0.1963000237941742, |
| "tpp_threshold_50_total_metric": 0.1862499952316284, |
| "tpp_threshold_50_intended_diff_only": 0.4620000243186951, |
| "tpp_threshold_50_unintended_diff_only": 0.27575002908706664, |
| "tpp_threshold_100_total_metric": 0.14230000972747803, |
| "tpp_threshold_100_intended_diff_only": 0.46660003662109373, |
| "tpp_threshold_100_unintended_diff_only": 0.3243000268936157, |
| "tpp_threshold_500_total_metric": 0.08400002717971802, |
| "tpp_threshold_500_intended_diff_only": 0.46780005693435667, |
| "tpp_threshold_500_unintended_diff_only": 0.38380002975463867 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.018250003457069397, |
| "tpp_threshold_2_intended_diff_only": 0.02820000648498535, |
| "tpp_threshold_2_unintended_diff_only": 0.009950003027915955, |
| "tpp_threshold_5_total_metric": 0.03780001103878021, |
| "tpp_threshold_5_intended_diff_only": 0.04980001449584961, |
| "tpp_threshold_5_unintended_diff_only": 0.012000003457069397, |
| "tpp_threshold_10_total_metric": 0.08190000057220459, |
| "tpp_threshold_10_intended_diff_only": 0.09940000772476196, |
| "tpp_threshold_10_unintended_diff_only": 0.017500007152557374, |
| "tpp_threshold_20_total_metric": 0.1312999963760376, |
| "tpp_threshold_20_intended_diff_only": 0.1562000036239624, |
| "tpp_threshold_20_unintended_diff_only": 0.024900007247924804, |
| "tpp_threshold_50_total_metric": 0.24470000565052033, |
| "tpp_threshold_50_intended_diff_only": 0.279200005531311, |
| "tpp_threshold_50_unintended_diff_only": 0.03449999988079071, |
| "tpp_threshold_100_total_metric": 0.31595001816749574, |
| "tpp_threshold_100_intended_diff_only": 0.36080002784729004, |
| "tpp_threshold_100_unintended_diff_only": 0.044850009679794314, |
| "tpp_threshold_500_total_metric": 0.36955003440380096, |
| "tpp_threshold_500_intended_diff_only": 0.43500003814697263, |
| "tpp_threshold_500_unintended_diff_only": 0.0654500037431717 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.19.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.19.hook_resid_post", |
| "hook_layer": 19, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.19500000774860382, |
| "tpp_threshold_2_intended_diff_only": 0.24900001287460327, |
| "tpp_threshold_2_unintended_diff_only": 0.05400000512599945, |
| "tpp_threshold_5_total_metric": 0.26350006461143494, |
| "tpp_threshold_5_intended_diff_only": 0.3380000591278076, |
| "tpp_threshold_5_unintended_diff_only": 0.07449999451637268, |
| "tpp_threshold_10_total_metric": 0.2760000377893448, |
| "tpp_threshold_10_intended_diff_only": 0.3760000467300415, |
| "tpp_threshold_10_unintended_diff_only": 0.10000000894069672, |
| "tpp_threshold_20_total_metric": 0.2487500011920929, |
| "tpp_threshold_20_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.16225001215934753, |
| "tpp_threshold_50_total_metric": 0.1509999781847, |
| "tpp_threshold_50_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_50_unintended_diff_only": 0.28300003707408905, |
| "tpp_threshold_100_total_metric": 0.10950002074241638, |
| "tpp_threshold_100_intended_diff_only": 0.4450000524520874, |
| "tpp_threshold_100_unintended_diff_only": 0.335500031709671, |
| "tpp_threshold_500_total_metric": 0.06375002861022949, |
| "tpp_threshold_500_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_500_unintended_diff_only": 0.3852500319480896 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.14850004017353058, |
| "tpp_threshold_2_intended_diff_only": 0.19000005722045898, |
| "tpp_threshold_2_unintended_diff_only": 0.041500017046928406, |
| "tpp_threshold_5_total_metric": 0.1977500319480896, |
| "tpp_threshold_5_intended_diff_only": 0.26100003719329834, |
| "tpp_threshold_5_unintended_diff_only": 0.06325000524520874, |
| "tpp_threshold_10_total_metric": 0.2460000216960907, |
| "tpp_threshold_10_intended_diff_only": 0.33900004625320435, |
| "tpp_threshold_10_unintended_diff_only": 0.09300002455711365, |
| "tpp_threshold_20_total_metric": 0.20899999141693115, |
| "tpp_threshold_20_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_20_unintended_diff_only": 0.21000003814697266, |
| "tpp_threshold_50_total_metric": 0.1797500103712082, |
| "tpp_threshold_50_intended_diff_only": 0.46000003814697266, |
| "tpp_threshold_50_unintended_diff_only": 0.28025002777576447, |
| "tpp_threshold_100_total_metric": 0.12900003790855408, |
| "tpp_threshold_100_intended_diff_only": 0.4670000672340393, |
| "tpp_threshold_100_unintended_diff_only": 0.33800002932548523, |
| "tpp_threshold_500_total_metric": 0.07325004041194916, |
| "tpp_threshold_500_intended_diff_only": 0.4670000672340393, |
| "tpp_threshold_500_unintended_diff_only": 0.39375002682209015 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.05275002121925354, |
| "tpp_threshold_2_intended_diff_only": 0.058000028133392334, |
| "tpp_threshold_2_unintended_diff_only": 0.005250006914138794, |
| "tpp_threshold_5_total_metric": 0.2732500284910202, |
| "tpp_threshold_5_intended_diff_only": 0.33000004291534424, |
| "tpp_threshold_5_unintended_diff_only": 0.056750014424324036, |
| "tpp_threshold_10_total_metric": 0.25449998676776886, |
| "tpp_threshold_10_intended_diff_only": 0.41200000047683716, |
| "tpp_threshold_10_unintended_diff_only": 0.1575000137090683, |
| "tpp_threshold_20_total_metric": 0.22875000536441803, |
| "tpp_threshold_20_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_20_unintended_diff_only": 0.20525000989437103, |
| "tpp_threshold_50_total_metric": 0.18674999475479126, |
| "tpp_threshold_50_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_50_unintended_diff_only": 0.26125001907348633, |
| "tpp_threshold_100_total_metric": 0.15149998664855957, |
| "tpp_threshold_100_intended_diff_only": 0.453000009059906, |
| "tpp_threshold_100_unintended_diff_only": 0.30150002241134644, |
| "tpp_threshold_500_total_metric": 0.0650000274181366, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.3890000283718109 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.22350001335144043, |
| "tpp_threshold_2_intended_diff_only": 0.23900002241134644, |
| "tpp_threshold_2_unintended_diff_only": 0.015500009059906006, |
| "tpp_threshold_5_total_metric": 0.35475000739097595, |
| "tpp_threshold_5_intended_diff_only": 0.43300002813339233, |
| "tpp_threshold_5_unintended_diff_only": 0.07825002074241638, |
| "tpp_threshold_10_total_metric": 0.3397500514984131, |
| "tpp_threshold_10_intended_diff_only": 0.49000006914138794, |
| "tpp_threshold_10_unintended_diff_only": 0.15025001764297485, |
| "tpp_threshold_20_total_metric": 0.3060000091791153, |
| "tpp_threshold_20_intended_diff_only": 0.4930000305175781, |
| "tpp_threshold_20_unintended_diff_only": 0.18700002133846283, |
| "tpp_threshold_50_total_metric": 0.23674999177455902, |
| "tpp_threshold_50_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_50_unintended_diff_only": 0.25725002586841583, |
| "tpp_threshold_100_total_metric": 0.20649999380111694, |
| "tpp_threshold_100_intended_diff_only": 0.49400001764297485, |
| "tpp_threshold_100_unintended_diff_only": 0.2875000238418579, |
| "tpp_threshold_500_total_metric": 0.13575002551078796, |
| "tpp_threshold_500_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_500_unintended_diff_only": 0.3592500388622284 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.02699996531009674, |
| "tpp_threshold_2_intended_diff_only": 0.029999971389770508, |
| "tpp_threshold_2_unintended_diff_only": 0.003000006079673767, |
| "tpp_threshold_5_total_metric": 0.19899995625019073, |
| "tpp_threshold_5_intended_diff_only": 0.23799997568130493, |
| "tpp_threshold_5_unintended_diff_only": 0.0390000194311142, |
| "tpp_threshold_10_total_metric": 0.23625001311302185, |
| "tpp_threshold_10_intended_diff_only": 0.409000039100647, |
| "tpp_threshold_10_unintended_diff_only": 0.17275002598762512, |
| "tpp_threshold_20_total_metric": 0.24399998784065247, |
| "tpp_threshold_20_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_20_unintended_diff_only": 0.21700003743171692, |
| "tpp_threshold_50_total_metric": 0.1770000010728836, |
| "tpp_threshold_50_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_50_unintended_diff_only": 0.2970000356435776, |
| "tpp_threshold_100_total_metric": 0.11500000953674316, |
| "tpp_threshold_100_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.359000027179718, |
| "tpp_threshold_500_total_metric": 0.08225001394748688, |
| "tpp_threshold_500_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_500_unintended_diff_only": 0.3917500227689743 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": -0.0010000169277191162, |
| "tpp_threshold_2_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_2_unintended_diff_only": 0.011000007390975952, |
| "tpp_threshold_5_total_metric": -0.0004999786615371704, |
| "tpp_threshold_5_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_5_unintended_diff_only": 0.012500002980232239, |
| "tpp_threshold_10_total_metric": -0.0017500072717666626, |
| "tpp_threshold_10_intended_diff_only": 0.013999998569488525, |
| "tpp_threshold_10_unintended_diff_only": 0.015750005841255188, |
| "tpp_threshold_20_total_metric": 0.023499995470046997, |
| "tpp_threshold_20_intended_diff_only": 0.046999990940093994, |
| "tpp_threshold_20_unintended_diff_only": 0.023499995470046997, |
| "tpp_threshold_50_total_metric": 0.1430000215768814, |
| "tpp_threshold_50_intended_diff_only": 0.17400002479553223, |
| "tpp_threshold_50_unintended_diff_only": 0.031000003218650818, |
| "tpp_threshold_100_total_metric": 0.27025000751018524, |
| "tpp_threshold_100_intended_diff_only": 0.3050000071525574, |
| "tpp_threshold_100_unintended_diff_only": 0.03474999964237213, |
| "tpp_threshold_500_total_metric": 0.40175001323223114, |
| "tpp_threshold_500_intended_diff_only": 0.453000009059906, |
| "tpp_threshold_500_unintended_diff_only": 0.051249995827674866 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.09525005519390106, |
| "tpp_threshold_2_intended_diff_only": 0.11200004816055298, |
| "tpp_threshold_2_unintended_diff_only": 0.016749992966651917, |
| "tpp_threshold_5_total_metric": 0.13100005686283112, |
| "tpp_threshold_5_intended_diff_only": 0.15300005674362183, |
| "tpp_threshold_5_unintended_diff_only": 0.02199999988079071, |
| "tpp_threshold_10_total_metric": 0.1937500238418579, |
| "tpp_threshold_10_intended_diff_only": 0.21900004148483276, |
| "tpp_threshold_10_unintended_diff_only": 0.025250017642974854, |
| "tpp_threshold_20_total_metric": 0.2627500146627426, |
| "tpp_threshold_20_intended_diff_only": 0.29500001668930054, |
| "tpp_threshold_20_unintended_diff_only": 0.03225000202655792, |
| "tpp_threshold_50_total_metric": 0.348750039935112, |
| "tpp_threshold_50_intended_diff_only": 0.38700002431869507, |
| "tpp_threshold_50_unintended_diff_only": 0.03824998438358307, |
| "tpp_threshold_100_total_metric": 0.3850000351667404, |
| "tpp_threshold_100_intended_diff_only": 0.44200003147125244, |
| "tpp_threshold_100_unintended_diff_only": 0.056999996304512024, |
| "tpp_threshold_500_total_metric": 0.36750006675720215, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.07649999856948853 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.0222499817609787, |
| "tpp_threshold_2_intended_diff_only": -0.009999990463256836, |
| "tpp_threshold_2_unintended_diff_only": 0.012249991297721863, |
| "tpp_threshold_5_total_metric": -0.020749971270561218, |
| "tpp_threshold_5_intended_diff_only": -0.010999977588653564, |
| "tpp_threshold_5_unintended_diff_only": 0.009749993681907654, |
| "tpp_threshold_10_total_metric": -0.010000035166740417, |
| "tpp_threshold_10_intended_diff_only": 0.001999974250793457, |
| "tpp_threshold_10_unintended_diff_only": 0.012000009417533875, |
| "tpp_threshold_20_total_metric": 0.023499980568885803, |
| "tpp_threshold_20_intended_diff_only": 0.042999982833862305, |
| "tpp_threshold_20_unintended_diff_only": 0.0195000022649765, |
| "tpp_threshold_50_total_metric": 0.10149997472763062, |
| "tpp_threshold_50_intended_diff_only": 0.12699997425079346, |
| "tpp_threshold_50_unintended_diff_only": 0.025499999523162842, |
| "tpp_threshold_100_total_metric": 0.21325001120567322, |
| "tpp_threshold_100_intended_diff_only": 0.25700002908706665, |
| "tpp_threshold_100_unintended_diff_only": 0.04375001788139343, |
| "tpp_threshold_500_total_metric": 0.35225002467632294, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.07975001633167267 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.001749977469444275, |
| "tpp_threshold_2_intended_diff_only": 0.0059999823570251465, |
| "tpp_threshold_2_unintended_diff_only": 0.004250004887580872, |
| "tpp_threshold_5_total_metric": 0.05374997854232788, |
| "tpp_threshold_5_intended_diff_only": 0.06599998474121094, |
| "tpp_threshold_5_unintended_diff_only": 0.012250006198883057, |
| "tpp_threshold_10_total_metric": 0.14674998819828033, |
| "tpp_threshold_10_intended_diff_only": 0.17100000381469727, |
| "tpp_threshold_10_unintended_diff_only": 0.02425001561641693, |
| "tpp_threshold_20_total_metric": 0.1929999738931656, |
| "tpp_threshold_20_intended_diff_only": 0.22699999809265137, |
| "tpp_threshold_20_unintended_diff_only": 0.03400002419948578, |
| "tpp_threshold_50_total_metric": 0.3047500103712082, |
| "tpp_threshold_50_intended_diff_only": 0.3460000157356262, |
| "tpp_threshold_50_unintended_diff_only": 0.04125000536441803, |
| "tpp_threshold_100_total_metric": 0.3527500182390213, |
| "tpp_threshold_100_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_100_unintended_diff_only": 0.047250017523765564, |
| "tpp_threshold_500_total_metric": 0.37675003707408905, |
| "tpp_threshold_500_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_500_unintended_diff_only": 0.06024999916553497 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.017499983310699463, |
| "tpp_threshold_2_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_2_unintended_diff_only": 0.00550001859664917, |
| "tpp_threshold_5_total_metric": 0.025499969720840454, |
| "tpp_threshold_5_intended_diff_only": 0.02899998426437378, |
| "tpp_threshold_5_unintended_diff_only": 0.003500014543533325, |
| "tpp_threshold_10_total_metric": 0.08075003325939178, |
| "tpp_threshold_10_intended_diff_only": 0.0910000205039978, |
| "tpp_threshold_10_unintended_diff_only": 0.010249987244606018, |
| "tpp_threshold_20_total_metric": 0.15375001728534698, |
| "tpp_threshold_20_intended_diff_only": 0.1690000295639038, |
| "tpp_threshold_20_unintended_diff_only": 0.015250012278556824, |
| "tpp_threshold_50_total_metric": 0.3254999816417694, |
| "tpp_threshold_50_intended_diff_only": 0.3619999885559082, |
| "tpp_threshold_50_unintended_diff_only": 0.036500006914138794, |
| "tpp_threshold_100_total_metric": 0.35850001871585846, |
| "tpp_threshold_100_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_100_unintended_diff_only": 0.041500017046928406, |
| "tpp_threshold_500_total_metric": 0.34950003027915955, |
| "tpp_threshold_500_intended_diff_only": 0.409000039100647, |
| "tpp_threshold_500_unintended_diff_only": 0.05950000882148743 |
| } |
| } |
| } |
| } |