| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752280605, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.06562500298023223, |
| "tpp_threshold_2_intended_diff_only": 0.10120000243186951, |
| "tpp_threshold_2_unintended_diff_only": 0.035574999451637265, |
| "tpp_threshold_5_total_metric": 0.09397500455379486, |
| "tpp_threshold_5_intended_diff_only": 0.1577000081539154, |
| "tpp_threshold_5_unintended_diff_only": 0.06372500360012054, |
| "tpp_threshold_10_total_metric": 0.12410000264644622, |
| "tpp_threshold_10_intended_diff_only": 0.22310000658035278, |
| "tpp_threshold_10_unintended_diff_only": 0.09900000393390655, |
| "tpp_threshold_20_total_metric": 0.13942500799894333, |
| "tpp_threshold_20_intended_diff_only": 0.2701000154018402, |
| "tpp_threshold_20_unintended_diff_only": 0.13067500740289686, |
| "tpp_threshold_50_total_metric": 0.19275001138448716, |
| "tpp_threshold_50_intended_diff_only": 0.36670001745224, |
| "tpp_threshold_50_unintended_diff_only": 0.17395000606775282, |
| "tpp_threshold_100_total_metric": 0.21587502360343935, |
| "tpp_threshold_100_intended_diff_only": 0.41510003209114077, |
| "tpp_threshold_100_unintended_diff_only": 0.19922500848770142, |
| "tpp_threshold_500_total_metric": 0.18927503973245619, |
| "tpp_threshold_500_intended_diff_only": 0.43440004587173464, |
| "tpp_threshold_500_unintended_diff_only": 0.2451250061392784 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.11545001864433288, |
| "tpp_threshold_2_intended_diff_only": 0.18160001039505005, |
| "tpp_threshold_2_unintended_diff_only": 0.06614999175071716, |
| "tpp_threshold_5_total_metric": 0.1583000034093857, |
| "tpp_threshold_5_intended_diff_only": 0.2802000045776367, |
| "tpp_threshold_5_unintended_diff_only": 0.12190000116825103, |
| "tpp_threshold_10_total_metric": 0.17880000472068786, |
| "tpp_threshold_10_intended_diff_only": 0.3688000082969666, |
| "tpp_threshold_10_unintended_diff_only": 0.1900000035762787, |
| "tpp_threshold_20_total_metric": 0.157150000333786, |
| "tpp_threshold_20_intended_diff_only": 0.4096000075340271, |
| "tpp_threshold_20_unintended_diff_only": 0.2524500072002411, |
| "tpp_threshold_50_total_metric": 0.12085000574588775, |
| "tpp_threshold_50_intended_diff_only": 0.4490000128746033, |
| "tpp_threshold_50_unintended_diff_only": 0.3281500071287155, |
| "tpp_threshold_100_total_metric": 0.09360001385211944, |
| "tpp_threshold_100_intended_diff_only": 0.46260002851486204, |
| "tpp_threshold_100_unintended_diff_only": 0.3690000146627426, |
| "tpp_threshold_500_total_metric": 0.05980003476142883, |
| "tpp_threshold_500_intended_diff_only": 0.46620004177093505, |
| "tpp_threshold_500_unintended_diff_only": 0.40640000700950624 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.01579998731613159, |
| "tpp_threshold_2_intended_diff_only": 0.020799994468688965, |
| "tpp_threshold_2_unintended_diff_only": 0.005000007152557373, |
| "tpp_threshold_5_total_metric": 0.02965000569820404, |
| "tpp_threshold_5_intended_diff_only": 0.03520001173019409, |
| "tpp_threshold_5_unintended_diff_only": 0.005550006031990051, |
| "tpp_threshold_10_total_metric": 0.06940000057220459, |
| "tpp_threshold_10_intended_diff_only": 0.07740000486373902, |
| "tpp_threshold_10_unintended_diff_only": 0.008000004291534423, |
| "tpp_threshold_20_total_metric": 0.12170001566410064, |
| "tpp_threshold_20_intended_diff_only": 0.1306000232696533, |
| "tpp_threshold_20_unintended_diff_only": 0.008900007605552674, |
| "tpp_threshold_50_total_metric": 0.26465001702308655, |
| "tpp_threshold_50_intended_diff_only": 0.2844000220298767, |
| "tpp_threshold_50_unintended_diff_only": 0.01975000500679016, |
| "tpp_threshold_100_total_metric": 0.33815003335475924, |
| "tpp_threshold_100_intended_diff_only": 0.36760003566741944, |
| "tpp_threshold_100_unintended_diff_only": 0.029450002312660217, |
| "tpp_threshold_500_total_metric": 0.31875004470348356, |
| "tpp_threshold_500_intended_diff_only": 0.4026000499725342, |
| "tpp_threshold_500_unintended_diff_only": 0.08385000526905059 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.5.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.5.hook_resid_post", |
| "hook_layer": 5, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.24050001800060272, |
| "tpp_threshold_2_intended_diff_only": 0.3140000104904175, |
| "tpp_threshold_2_unintended_diff_only": 0.07349999248981476, |
| "tpp_threshold_5_total_metric": 0.20600000023841858, |
| "tpp_threshold_5_intended_diff_only": 0.3569999933242798, |
| "tpp_threshold_5_unintended_diff_only": 0.1509999930858612, |
| "tpp_threshold_10_total_metric": 0.1757500171661377, |
| "tpp_threshold_10_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_10_unintended_diff_only": 0.22025001049041748, |
| "tpp_threshold_20_total_metric": 0.1289999783039093, |
| "tpp_threshold_20_intended_diff_only": 0.421999990940094, |
| "tpp_threshold_20_unintended_diff_only": 0.2930000126361847, |
| "tpp_threshold_50_total_metric": 0.1082499772310257, |
| "tpp_threshold_50_intended_diff_only": 0.44099998474121094, |
| "tpp_threshold_50_unintended_diff_only": 0.33275000751018524, |
| "tpp_threshold_100_total_metric": 0.0820000171661377, |
| "tpp_threshold_100_intended_diff_only": 0.44600003957748413, |
| "tpp_threshold_100_unintended_diff_only": 0.36400002241134644, |
| "tpp_threshold_500_total_metric": 0.053000032901763916, |
| "tpp_threshold_500_intended_diff_only": 0.45100003480911255, |
| "tpp_threshold_500_unintended_diff_only": 0.39800000190734863 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.014249995350837708, |
| "tpp_threshold_2_intended_diff_only": 0.0690000057220459, |
| "tpp_threshold_2_unintended_diff_only": 0.05475001037120819, |
| "tpp_threshold_5_total_metric": 0.028249964118003845, |
| "tpp_threshold_5_intended_diff_only": 0.09399998188018799, |
| "tpp_threshold_5_unintended_diff_only": 0.06575001776218414, |
| "tpp_threshold_10_total_metric": 0.1304999589920044, |
| "tpp_threshold_10_intended_diff_only": 0.3539999723434448, |
| "tpp_threshold_10_unintended_diff_only": 0.22350001335144043, |
| "tpp_threshold_20_total_metric": 0.1199999749660492, |
| "tpp_threshold_20_intended_diff_only": 0.4129999876022339, |
| "tpp_threshold_20_unintended_diff_only": 0.2930000126361847, |
| "tpp_threshold_50_total_metric": 0.09524999558925629, |
| "tpp_threshold_50_intended_diff_only": 0.45399999618530273, |
| "tpp_threshold_50_unintended_diff_only": 0.35875000059604645, |
| "tpp_threshold_100_total_metric": 0.07099996507167816, |
| "tpp_threshold_100_intended_diff_only": 0.4599999785423279, |
| "tpp_threshold_100_unintended_diff_only": 0.3890000134706497, |
| "tpp_threshold_500_total_metric": 0.04175002872943878, |
| "tpp_threshold_500_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_500_unintended_diff_only": 0.4192499965429306 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.04950001835823059, |
| "tpp_threshold_2_intended_diff_only": 0.10100001096725464, |
| "tpp_threshold_2_unintended_diff_only": 0.05149999260902405, |
| "tpp_threshold_5_total_metric": 0.15900002419948578, |
| "tpp_threshold_5_intended_diff_only": 0.24900001287460327, |
| "tpp_threshold_5_unintended_diff_only": 0.08999998867511749, |
| "tpp_threshold_10_total_metric": 0.16850005090236664, |
| "tpp_threshold_10_intended_diff_only": 0.284000039100647, |
| "tpp_threshold_10_unintended_diff_only": 0.11549998819828033, |
| "tpp_threshold_20_total_metric": 0.17400002479553223, |
| "tpp_threshold_20_intended_diff_only": 0.3400000333786011, |
| "tpp_threshold_20_unintended_diff_only": 0.16600000858306885, |
| "tpp_threshold_50_total_metric": 0.1220000684261322, |
| "tpp_threshold_50_intended_diff_only": 0.3940000534057617, |
| "tpp_threshold_50_unintended_diff_only": 0.2719999849796295, |
| "tpp_threshold_100_total_metric": 0.09250003099441528, |
| "tpp_threshold_100_intended_diff_only": 0.4410000443458557, |
| "tpp_threshold_100_unintended_diff_only": 0.34850001335144043, |
| "tpp_threshold_500_total_metric": 0.039750054478645325, |
| "tpp_threshold_500_intended_diff_only": 0.4530000686645508, |
| "tpp_threshold_500_unintended_diff_only": 0.41325001418590546 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.19125007092952728, |
| "tpp_threshold_2_intended_diff_only": 0.27400004863739014, |
| "tpp_threshold_2_unintended_diff_only": 0.08274997770786285, |
| "tpp_threshold_5_total_metric": 0.2340000569820404, |
| "tpp_threshold_5_intended_diff_only": 0.3800000548362732, |
| "tpp_threshold_5_unintended_diff_only": 0.1459999978542328, |
| "tpp_threshold_10_total_metric": 0.2795000374317169, |
| "tpp_threshold_10_intended_diff_only": 0.44700002670288086, |
| "tpp_threshold_10_unintended_diff_only": 0.16749998927116394, |
| "tpp_threshold_20_total_metric": 0.2420000433921814, |
| "tpp_threshold_20_intended_diff_only": 0.46900004148483276, |
| "tpp_threshold_20_unintended_diff_only": 0.22699999809265137, |
| "tpp_threshold_50_total_metric": 0.15125000476837158, |
| "tpp_threshold_50_intended_diff_only": 0.484000027179718, |
| "tpp_threshold_50_unintended_diff_only": 0.33275002241134644, |
| "tpp_threshold_100_total_metric": 0.13150005042552948, |
| "tpp_threshold_100_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_100_unintended_diff_only": 0.3595000058412552, |
| "tpp_threshold_500_total_metric": 0.09250004589557648, |
| "tpp_threshold_500_intended_diff_only": 0.49100005626678467, |
| "tpp_threshold_500_unintended_diff_only": 0.3985000103712082 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.08174999058246613, |
| "tpp_threshold_2_intended_diff_only": 0.1499999761581421, |
| "tpp_threshold_2_unintended_diff_only": 0.06824998557567596, |
| "tpp_threshold_5_total_metric": 0.1642499715089798, |
| "tpp_threshold_5_intended_diff_only": 0.32099997997283936, |
| "tpp_threshold_5_unintended_diff_only": 0.15675000846385956, |
| "tpp_threshold_10_total_metric": 0.13974995911121368, |
| "tpp_threshold_10_intended_diff_only": 0.36299997568130493, |
| "tpp_threshold_10_unintended_diff_only": 0.22325001657009125, |
| "tpp_threshold_20_total_metric": 0.12074998021125793, |
| "tpp_threshold_20_intended_diff_only": 0.4039999842643738, |
| "tpp_threshold_20_unintended_diff_only": 0.28325000405311584, |
| "tpp_threshold_50_total_metric": 0.12749998271465302, |
| "tpp_threshold_50_intended_diff_only": 0.47200000286102295, |
| "tpp_threshold_50_unintended_diff_only": 0.34450002014636993, |
| "tpp_threshold_100_total_metric": 0.09100000560283661, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.3840000182390213, |
| "tpp_threshold_500_total_metric": 0.07200001180171967, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.40300001204013824 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.006499961018562317, |
| "tpp_threshold_2_intended_diff_only": 0.011999964714050293, |
| "tpp_threshold_2_unintended_diff_only": 0.005500003695487976, |
| "tpp_threshold_5_total_metric": 0.012749984860420227, |
| "tpp_threshold_5_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_5_unintended_diff_only": 0.005250021815299988, |
| "tpp_threshold_10_total_metric": 0.04499998688697815, |
| "tpp_threshold_10_intended_diff_only": 0.0559999942779541, |
| "tpp_threshold_10_unintended_diff_only": 0.011000007390975952, |
| "tpp_threshold_20_total_metric": 0.1352500021457672, |
| "tpp_threshold_20_intended_diff_only": 0.14300000667572021, |
| "tpp_threshold_20_unintended_diff_only": 0.007750004529953003, |
| "tpp_threshold_50_total_metric": 0.27049998939037323, |
| "tpp_threshold_50_intended_diff_only": 0.28200000524520874, |
| "tpp_threshold_50_unintended_diff_only": 0.01150001585483551, |
| "tpp_threshold_100_total_metric": 0.3675000071525574, |
| "tpp_threshold_100_intended_diff_only": 0.3920000195503235, |
| "tpp_threshold_100_unintended_diff_only": 0.024500012397766113, |
| "tpp_threshold_500_total_metric": 0.34825003147125244, |
| "tpp_threshold_500_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_500_unintended_diff_only": 0.08875000476837158 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.035500019788742065, |
| "tpp_threshold_2_intended_diff_only": 0.04100000858306885, |
| "tpp_threshold_2_unintended_diff_only": 0.005499988794326782, |
| "tpp_threshold_5_total_metric": 0.05700002610683441, |
| "tpp_threshold_5_intended_diff_only": 0.06800001859664917, |
| "tpp_threshold_5_unintended_diff_only": 0.010999992489814758, |
| "tpp_threshold_10_total_metric": 0.09274999797344208, |
| "tpp_threshold_10_intended_diff_only": 0.09700000286102295, |
| "tpp_threshold_10_unintended_diff_only": 0.004250004887580872, |
| "tpp_threshold_20_total_metric": 0.15775004029273987, |
| "tpp_threshold_20_intended_diff_only": 0.16700005531311035, |
| "tpp_threshold_20_unintended_diff_only": 0.009250015020370483, |
| "tpp_threshold_50_total_metric": 0.3160000443458557, |
| "tpp_threshold_50_intended_diff_only": 0.33900004625320435, |
| "tpp_threshold_50_unintended_diff_only": 0.023000001907348633, |
| "tpp_threshold_100_total_metric": 0.3765000253915787, |
| "tpp_threshold_100_intended_diff_only": 0.4150000214576721, |
| "tpp_threshold_100_unintended_diff_only": 0.038499996066093445, |
| "tpp_threshold_500_total_metric": 0.31000006198883057, |
| "tpp_threshold_500_intended_diff_only": 0.42100006341934204, |
| "tpp_threshold_500_unintended_diff_only": 0.11100000143051147 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": 0.0222499817609787, |
| "tpp_threshold_2_intended_diff_only": 0.02799999713897705, |
| "tpp_threshold_2_unintended_diff_only": 0.005750015377998352, |
| "tpp_threshold_5_total_metric": 0.02850000560283661, |
| "tpp_threshold_5_intended_diff_only": 0.03200000524520874, |
| "tpp_threshold_5_unintended_diff_only": 0.0034999996423721313, |
| "tpp_threshold_10_total_metric": 0.07700000703334808, |
| "tpp_threshold_10_intended_diff_only": 0.08700001239776611, |
| "tpp_threshold_10_unintended_diff_only": 0.01000000536441803, |
| "tpp_threshold_20_total_metric": 0.09650002419948578, |
| "tpp_threshold_20_intended_diff_only": 0.11000001430511475, |
| "tpp_threshold_20_unintended_diff_only": 0.013499990105628967, |
| "tpp_threshold_50_total_metric": 0.22750000655651093, |
| "tpp_threshold_50_intended_diff_only": 0.2590000033378601, |
| "tpp_threshold_50_unintended_diff_only": 0.03149999678134918, |
| "tpp_threshold_100_total_metric": 0.3250000476837158, |
| "tpp_threshold_100_intended_diff_only": 0.362000048160553, |
| "tpp_threshold_100_unintended_diff_only": 0.03700000047683716, |
| "tpp_threshold_500_total_metric": 0.3045000582933426, |
| "tpp_threshold_500_intended_diff_only": 0.3980000615119934, |
| "tpp_threshold_500_unintended_diff_only": 0.09350000321865082 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.016250014305114746, |
| "tpp_threshold_2_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_2_unintended_diff_only": 0.00475001335144043, |
| "tpp_threshold_5_total_metric": 0.029250040650367737, |
| "tpp_threshold_5_intended_diff_only": 0.0350000262260437, |
| "tpp_threshold_5_unintended_diff_only": 0.005749985575675964, |
| "tpp_threshold_10_total_metric": 0.06299999356269836, |
| "tpp_threshold_10_intended_diff_only": 0.06999999284744263, |
| "tpp_threshold_10_unintended_diff_only": 0.006999999284744263, |
| "tpp_threshold_20_total_metric": 0.09975001215934753, |
| "tpp_threshold_20_intended_diff_only": 0.10900002717971802, |
| "tpp_threshold_20_unintended_diff_only": 0.009250015020370483, |
| "tpp_threshold_50_total_metric": 0.20725004374980927, |
| "tpp_threshold_50_intended_diff_only": 0.22800004482269287, |
| "tpp_threshold_50_unintended_diff_only": 0.020750001072883606, |
| "tpp_threshold_100_total_metric": 0.29475004971027374, |
| "tpp_threshold_100_intended_diff_only": 0.3200000524520874, |
| "tpp_threshold_100_unintended_diff_only": 0.02525000274181366, |
| "tpp_threshold_500_total_metric": 0.32975004613399506, |
| "tpp_threshold_500_intended_diff_only": 0.4030000567436218, |
| "tpp_threshold_500_unintended_diff_only": 0.07325001060962677 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": -0.0015000402927398682, |
| "tpp_threshold_2_intended_diff_only": 0.001999974250793457, |
| "tpp_threshold_2_unintended_diff_only": 0.003500014543533325, |
| "tpp_threshold_5_total_metric": 0.020749971270561218, |
| "tpp_threshold_5_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_5_unintended_diff_only": 0.0022500306367874146, |
| "tpp_threshold_10_total_metric": 0.06925001740455627, |
| "tpp_threshold_10_intended_diff_only": 0.07700002193450928, |
| "tpp_threshold_10_unintended_diff_only": 0.007750004529953003, |
| "tpp_threshold_20_total_metric": 0.11924999952316284, |
| "tpp_threshold_20_intended_diff_only": 0.12400001287460327, |
| "tpp_threshold_20_unintended_diff_only": 0.00475001335144043, |
| "tpp_threshold_50_total_metric": 0.3020000010728836, |
| "tpp_threshold_50_intended_diff_only": 0.3140000104904175, |
| "tpp_threshold_50_unintended_diff_only": 0.012000009417533875, |
| "tpp_threshold_100_total_metric": 0.32700003683567047, |
| "tpp_threshold_100_intended_diff_only": 0.3490000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.02199999988079071, |
| "tpp_threshold_500_total_metric": 0.30125002562999725, |
| "tpp_threshold_500_intended_diff_only": 0.3540000319480896, |
| "tpp_threshold_500_unintended_diff_only": 0.052750006318092346 |
| } |
| } |
| } |
| } |