| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745619808049, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.06397499144077301, |
| "tpp_threshold_2_intended_diff_only": 0.07360000014305115, |
| "tpp_threshold_2_unintended_diff_only": 0.009625008702278138, |
| "tpp_threshold_5_total_metric": 0.16095000654459002, |
| "tpp_threshold_5_intended_diff_only": 0.19380001425743104, |
| "tpp_threshold_5_unintended_diff_only": 0.03285000771284104, |
| "tpp_threshold_10_total_metric": 0.19397500455379485, |
| "tpp_threshold_10_intended_diff_only": 0.2591000199317932, |
| "tpp_threshold_10_unintended_diff_only": 0.06512501537799835, |
| "tpp_threshold_20_total_metric": 0.21055000871419907, |
| "tpp_threshold_20_intended_diff_only": 0.30880002379417415, |
| "tpp_threshold_20_unintended_diff_only": 0.09825001507997513, |
| "tpp_threshold_50_total_metric": 0.23247500360012055, |
| "tpp_threshold_50_intended_diff_only": 0.3744000196456909, |
| "tpp_threshold_50_unintended_diff_only": 0.14192501604557037, |
| "tpp_threshold_100_total_metric": 0.2522250100970268, |
| "tpp_threshold_100_intended_diff_only": 0.4164000272750854, |
| "tpp_threshold_100_unintended_diff_only": 0.16417501717805863, |
| "tpp_threshold_500_total_metric": 0.22612503468990325, |
| "tpp_threshold_500_intended_diff_only": 0.45150005221366885, |
| "tpp_threshold_500_unintended_diff_only": 0.22537501752376554 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.10694999396800994, |
| "tpp_threshold_2_intended_diff_only": 0.1190000057220459, |
| "tpp_threshold_2_unintended_diff_only": 0.01205001175403595, |
| "tpp_threshold_5_total_metric": 0.25255001783370973, |
| "tpp_threshold_5_intended_diff_only": 0.30580003261566163, |
| "tpp_threshold_5_unintended_diff_only": 0.053250014781951904, |
| "tpp_threshold_10_total_metric": 0.2869000047445297, |
| "tpp_threshold_10_intended_diff_only": 0.3998000264167786, |
| "tpp_threshold_10_unintended_diff_only": 0.11290002167224884, |
| "tpp_threshold_20_total_metric": 0.26420001685619354, |
| "tpp_threshold_20_intended_diff_only": 0.43500003814697263, |
| "tpp_threshold_20_unintended_diff_only": 0.17080002129077912, |
| "tpp_threshold_50_total_metric": 0.21349999606609343, |
| "tpp_threshold_50_intended_diff_only": 0.46140002012252807, |
| "tpp_threshold_50_unintended_diff_only": 0.24790002405643463, |
| "tpp_threshold_100_total_metric": 0.18240001797676086, |
| "tpp_threshold_100_intended_diff_only": 0.46620004177093505, |
| "tpp_threshold_100_unintended_diff_only": 0.2838000237941742, |
| "tpp_threshold_500_total_metric": 0.09815002977848053, |
| "tpp_threshold_500_intended_diff_only": 0.46780005693435667, |
| "tpp_threshold_500_unintended_diff_only": 0.36965002715587614 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.02099998891353607, |
| "tpp_threshold_2_intended_diff_only": 0.028199994564056398, |
| "tpp_threshold_2_unintended_diff_only": 0.007200005650520325, |
| "tpp_threshold_5_total_metric": 0.06934999525547028, |
| "tpp_threshold_5_intended_diff_only": 0.08179999589920044, |
| "tpp_threshold_5_unintended_diff_only": 0.012450000643730164, |
| "tpp_threshold_10_total_metric": 0.10105000436306, |
| "tpp_threshold_10_intended_diff_only": 0.11840001344680787, |
| "tpp_threshold_10_unintended_diff_only": 0.017350009083747862, |
| "tpp_threshold_20_total_metric": 0.1569000005722046, |
| "tpp_threshold_20_intended_diff_only": 0.18260000944137572, |
| "tpp_threshold_20_unintended_diff_only": 0.02570000886917114, |
| "tpp_threshold_50_total_metric": 0.25145001113414767, |
| "tpp_threshold_50_intended_diff_only": 0.28740001916885377, |
| "tpp_threshold_50_unintended_diff_only": 0.03595000803470612, |
| "tpp_threshold_100_total_metric": 0.3220500022172928, |
| "tpp_threshold_100_intended_diff_only": 0.3666000127792358, |
| "tpp_threshold_100_unintended_diff_only": 0.044550010561943056, |
| "tpp_threshold_500_total_metric": 0.35410003960132597, |
| "tpp_threshold_500_intended_diff_only": 0.43520004749298097, |
| "tpp_threshold_500_unintended_diff_only": 0.08110000789165497 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.19.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.19.hook_resid_post", |
| "hook_layer": 19, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.14924998581409454, |
| "tpp_threshold_2_intended_diff_only": 0.16699999570846558, |
| "tpp_threshold_2_unintended_diff_only": 0.017750009894371033, |
| "tpp_threshold_5_total_metric": 0.2567500025033951, |
| "tpp_threshold_5_intended_diff_only": 0.32200002670288086, |
| "tpp_threshold_5_unintended_diff_only": 0.06525002419948578, |
| "tpp_threshold_10_total_metric": 0.2630000114440918, |
| "tpp_threshold_10_intended_diff_only": 0.3720000386238098, |
| "tpp_threshold_10_unintended_diff_only": 0.10900002717971802, |
| "tpp_threshold_20_total_metric": 0.20325002074241638, |
| "tpp_threshold_20_intended_diff_only": 0.41300004720687866, |
| "tpp_threshold_20_unintended_diff_only": 0.20975002646446228, |
| "tpp_threshold_50_total_metric": 0.14949998259544373, |
| "tpp_threshold_50_intended_diff_only": 0.4390000104904175, |
| "tpp_threshold_50_unintended_diff_only": 0.28950002789497375, |
| "tpp_threshold_100_total_metric": 0.11849997937679291, |
| "tpp_threshold_100_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_100_unintended_diff_only": 0.3295000344514847, |
| "tpp_threshold_500_total_metric": 0.05925002694129944, |
| "tpp_threshold_500_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_500_unintended_diff_only": 0.38975003361701965 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.11900004744529724, |
| "tpp_threshold_2_intended_diff_only": 0.15300005674362183, |
| "tpp_threshold_2_unintended_diff_only": 0.034000009298324585, |
| "tpp_threshold_5_total_metric": 0.20725004374980927, |
| "tpp_threshold_5_intended_diff_only": 0.28800004720687866, |
| "tpp_threshold_5_unintended_diff_only": 0.0807500034570694, |
| "tpp_threshold_10_total_metric": 0.24950000643730164, |
| "tpp_threshold_10_intended_diff_only": 0.3460000157356262, |
| "tpp_threshold_10_unintended_diff_only": 0.09650000929832458, |
| "tpp_threshold_20_total_metric": 0.21600006520748138, |
| "tpp_threshold_20_intended_diff_only": 0.4070000648498535, |
| "tpp_threshold_20_unintended_diff_only": 0.19099999964237213, |
| "tpp_threshold_50_total_metric": 0.17075000703334808, |
| "tpp_threshold_50_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_50_unintended_diff_only": 0.2862500101327896, |
| "tpp_threshold_100_total_metric": 0.14200004935264587, |
| "tpp_threshold_100_intended_diff_only": 0.4670000672340393, |
| "tpp_threshold_100_unintended_diff_only": 0.32500001788139343, |
| "tpp_threshold_500_total_metric": 0.0792500376701355, |
| "tpp_threshold_500_intended_diff_only": 0.4670000672340393, |
| "tpp_threshold_500_unintended_diff_only": 0.3877500295639038 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.0392499715089798, |
| "tpp_threshold_2_intended_diff_only": 0.03799998760223389, |
| "tpp_threshold_2_unintended_diff_only": -0.0012499839067459106, |
| "tpp_threshold_5_total_metric": 0.2134999930858612, |
| "tpp_threshold_5_intended_diff_only": 0.22600001096725464, |
| "tpp_threshold_5_unintended_diff_only": 0.012500017881393433, |
| "tpp_threshold_10_total_metric": 0.2815000116825104, |
| "tpp_threshold_10_intended_diff_only": 0.38200002908706665, |
| "tpp_threshold_10_unintended_diff_only": 0.10050001740455627, |
| "tpp_threshold_20_total_metric": 0.27025000751018524, |
| "tpp_threshold_20_intended_diff_only": 0.4100000262260437, |
| "tpp_threshold_20_unintended_diff_only": 0.13975001871585846, |
| "tpp_threshold_50_total_metric": 0.25049997866153717, |
| "tpp_threshold_50_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_50_unintended_diff_only": 0.19350002706050873, |
| "tpp_threshold_100_total_metric": 0.2302500456571579, |
| "tpp_threshold_100_intended_diff_only": 0.4490000605583191, |
| "tpp_threshold_100_unintended_diff_only": 0.2187500149011612, |
| "tpp_threshold_500_total_metric": 0.06675003468990326, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.38725002110004425 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.19249999523162842, |
| "tpp_threshold_2_intended_diff_only": 0.19900000095367432, |
| "tpp_threshold_2_unintended_diff_only": 0.0065000057220458984, |
| "tpp_threshold_5_total_metric": 0.3432500511407852, |
| "tpp_threshold_5_intended_diff_only": 0.4030000567436218, |
| "tpp_threshold_5_unintended_diff_only": 0.05975000560283661, |
| "tpp_threshold_10_total_metric": 0.36499999463558197, |
| "tpp_threshold_10_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_10_unintended_diff_only": 0.11500002443790436, |
| "tpp_threshold_20_total_metric": 0.34175001084804535, |
| "tpp_threshold_20_intended_diff_only": 0.4880000352859497, |
| "tpp_threshold_20_unintended_diff_only": 0.14625002443790436, |
| "tpp_threshold_50_total_metric": 0.257750004529953, |
| "tpp_threshold_50_intended_diff_only": 0.4930000305175781, |
| "tpp_threshold_50_unintended_diff_only": 0.23525002598762512, |
| "tpp_threshold_100_total_metric": 0.2252500206232071, |
| "tpp_threshold_100_intended_diff_only": 0.4930000305175781, |
| "tpp_threshold_100_unintended_diff_only": 0.26775000989437103, |
| "tpp_threshold_500_total_metric": 0.18575003743171692, |
| "tpp_threshold_500_intended_diff_only": 0.49500006437301636, |
| "tpp_threshold_500_unintended_diff_only": 0.30925002694129944 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.034749969840049744, |
| "tpp_threshold_2_intended_diff_only": 0.03799998760223389, |
| "tpp_threshold_2_unintended_diff_only": 0.003250017762184143, |
| "tpp_threshold_5_total_metric": 0.24199999868869781, |
| "tpp_threshold_5_intended_diff_only": 0.2900000214576721, |
| "tpp_threshold_5_unintended_diff_only": 0.048000022768974304, |
| "tpp_threshold_10_total_metric": 0.27549999952316284, |
| "tpp_threshold_10_intended_diff_only": 0.4190000295639038, |
| "tpp_threshold_10_unintended_diff_only": 0.14350003004074097, |
| "tpp_threshold_20_total_metric": 0.28974997997283936, |
| "tpp_threshold_20_intended_diff_only": 0.4570000171661377, |
| "tpp_threshold_20_unintended_diff_only": 0.16725003719329834, |
| "tpp_threshold_50_total_metric": 0.23900000751018524, |
| "tpp_threshold_50_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_50_unintended_diff_only": 0.23500002920627594, |
| "tpp_threshold_100_total_metric": 0.19599999487400055, |
| "tpp_threshold_100_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_100_unintended_diff_only": 0.27800004184246063, |
| "tpp_threshold_500_total_metric": 0.09975001215934753, |
| "tpp_threshold_500_intended_diff_only": 0.4740000367164612, |
| "tpp_threshold_500_unintended_diff_only": 0.37425002455711365 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.005750000476837158, |
| "tpp_threshold_2_intended_diff_only": 0.009999990463256836, |
| "tpp_threshold_2_unintended_diff_only": 0.004249989986419678, |
| "tpp_threshold_5_total_metric": 0.004249989986419678, |
| "tpp_threshold_5_intended_diff_only": 0.014999985694885254, |
| "tpp_threshold_5_unintended_diff_only": 0.010749995708465576, |
| "tpp_threshold_10_total_metric": 0.006999984383583069, |
| "tpp_threshold_10_intended_diff_only": 0.02399998903274536, |
| "tpp_threshold_10_unintended_diff_only": 0.017000004649162292, |
| "tpp_threshold_20_total_metric": 0.05425003170967102, |
| "tpp_threshold_20_intended_diff_only": 0.09000003337860107, |
| "tpp_threshold_20_unintended_diff_only": 0.035750001668930054, |
| "tpp_threshold_50_total_metric": 0.16875003278255463, |
| "tpp_threshold_50_intended_diff_only": 0.21500003337860107, |
| "tpp_threshold_50_unintended_diff_only": 0.04625000059604645, |
| "tpp_threshold_100_total_metric": 0.26999999582767487, |
| "tpp_threshold_100_intended_diff_only": 0.3230000138282776, |
| "tpp_threshold_100_unintended_diff_only": 0.05300001800060272, |
| "tpp_threshold_500_total_metric": 0.38075006008148193, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.07324999570846558 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.053749993443489075, |
| "tpp_threshold_2_intended_diff_only": 0.06400001049041748, |
| "tpp_threshold_2_unintended_diff_only": 0.010250017046928406, |
| "tpp_threshold_5_total_metric": 0.13125000894069672, |
| "tpp_threshold_5_intended_diff_only": 0.1420000195503235, |
| "tpp_threshold_5_unintended_diff_only": 0.01075001060962677, |
| "tpp_threshold_10_total_metric": 0.18050003051757812, |
| "tpp_threshold_10_intended_diff_only": 0.20000004768371582, |
| "tpp_threshold_10_unintended_diff_only": 0.019500017166137695, |
| "tpp_threshold_20_total_metric": 0.27650000154972076, |
| "tpp_threshold_20_intended_diff_only": 0.2990000247955322, |
| "tpp_threshold_20_unintended_diff_only": 0.022500023245811462, |
| "tpp_threshold_50_total_metric": 0.3475000113248825, |
| "tpp_threshold_50_intended_diff_only": 0.3920000195503235, |
| "tpp_threshold_50_unintended_diff_only": 0.04450000822544098, |
| "tpp_threshold_100_total_metric": 0.3945000320672989, |
| "tpp_threshold_100_intended_diff_only": 0.4410000443458557, |
| "tpp_threshold_100_unintended_diff_only": 0.046500012278556824, |
| "tpp_threshold_500_total_metric": 0.3717500567436218, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.07225000858306885 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.006750017404556274, |
| "tpp_threshold_2_intended_diff_only": -0.0040000081062316895, |
| "tpp_threshold_2_unintended_diff_only": 0.002750009298324585, |
| "tpp_threshold_5_total_metric": -0.01199999451637268, |
| "tpp_threshold_5_intended_diff_only": -0.0009999871253967285, |
| "tpp_threshold_5_unintended_diff_only": 0.011000007390975952, |
| "tpp_threshold_10_total_metric": -0.0002499818801879883, |
| "tpp_threshold_10_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_10_unintended_diff_only": 0.012250006198883057, |
| "tpp_threshold_20_total_metric": 0.02924998104572296, |
| "tpp_threshold_20_intended_diff_only": 0.046999990940093994, |
| "tpp_threshold_20_unintended_diff_only": 0.017750009894371033, |
| "tpp_threshold_50_total_metric": 0.14075002074241638, |
| "tpp_threshold_50_intended_diff_only": 0.16500002145767212, |
| "tpp_threshold_50_unintended_diff_only": 0.024250000715255737, |
| "tpp_threshold_100_total_metric": 0.25049999356269836, |
| "tpp_threshold_100_intended_diff_only": 0.28299999237060547, |
| "tpp_threshold_100_unintended_diff_only": 0.032499998807907104, |
| "tpp_threshold_500_total_metric": 0.3070000261068344, |
| "tpp_threshold_500_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_500_unintended_diff_only": 0.1250000149011612 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.02899995446205139, |
| "tpp_threshold_2_intended_diff_only": 0.04399996995925903, |
| "tpp_threshold_2_unintended_diff_only": 0.015000015497207642, |
| "tpp_threshold_5_total_metric": 0.16324998438358307, |
| "tpp_threshold_5_intended_diff_only": 0.18699997663497925, |
| "tpp_threshold_5_unintended_diff_only": 0.02374999225139618, |
| "tpp_threshold_10_total_metric": 0.21774998307228088, |
| "tpp_threshold_10_intended_diff_only": 0.24199998378753662, |
| "tpp_threshold_10_unintended_diff_only": 0.024250000715255737, |
| "tpp_threshold_20_total_metric": 0.24124999344348907, |
| "tpp_threshold_20_intended_diff_only": 0.27799999713897705, |
| "tpp_threshold_20_unintended_diff_only": 0.036750003695487976, |
| "tpp_threshold_50_total_metric": 0.3099999725818634, |
| "tpp_threshold_50_intended_diff_only": 0.3479999899864197, |
| "tpp_threshold_50_unintended_diff_only": 0.038000017404556274, |
| "tpp_threshold_100_total_metric": 0.34050001204013824, |
| "tpp_threshold_100_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_100_unintended_diff_only": 0.05550001561641693, |
| "tpp_threshold_500_total_metric": 0.3670000284910202, |
| "tpp_threshold_500_intended_diff_only": 0.437000036239624, |
| "tpp_threshold_500_unintended_diff_only": 0.07000000774860382 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.02325001358985901, |
| "tpp_threshold_2_intended_diff_only": 0.027000010013580322, |
| "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135, |
| "tpp_threshold_5_total_metric": 0.0599999874830246, |
| "tpp_threshold_5_intended_diff_only": 0.06599998474121094, |
| "tpp_threshold_5_unintended_diff_only": 0.00599999725818634, |
| "tpp_threshold_10_total_metric": 0.1002500057220459, |
| "tpp_threshold_10_intended_diff_only": 0.11400002241134644, |
| "tpp_threshold_10_unintended_diff_only": 0.013750016689300537, |
| "tpp_threshold_20_total_metric": 0.18324999511241913, |
| "tpp_threshold_20_intended_diff_only": 0.19900000095367432, |
| "tpp_threshold_20_unintended_diff_only": 0.015750005841255188, |
| "tpp_threshold_50_total_metric": 0.2902500182390213, |
| "tpp_threshold_50_intended_diff_only": 0.31700003147125244, |
| "tpp_threshold_50_unintended_diff_only": 0.02675001323223114, |
| "tpp_threshold_100_total_metric": 0.35474997758865356, |
| "tpp_threshold_100_intended_diff_only": 0.38999998569488525, |
| "tpp_threshold_100_unintended_diff_only": 0.03525000810623169, |
| "tpp_threshold_500_total_metric": 0.34400002658367157, |
| "tpp_threshold_500_intended_diff_only": 0.409000039100647, |
| "tpp_threshold_500_unintended_diff_only": 0.0650000125169754 |
| } |
| } |
| } |
| } |