| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "65b37170-42a3-4f63-bfdc-b4728a912b3a", |
| "datetime_epoch_millis": 1745620739257, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.08224999010562897, |
| "tpp_threshold_2_intended_diff_only": 0.09669999480247497, |
| "tpp_threshold_2_unintended_diff_only": 0.014450004696846007, |
| "tpp_threshold_5_total_metric": 0.1671000078320503, |
| "tpp_threshold_5_intended_diff_only": 0.20640000104904177, |
| "tpp_threshold_5_unintended_diff_only": 0.03929999321699143, |
| "tpp_threshold_10_total_metric": 0.19772501885890958, |
| "tpp_threshold_10_intended_diff_only": 0.266100013256073, |
| "tpp_threshold_10_unintended_diff_only": 0.0683749943971634, |
| "tpp_threshold_20_total_metric": 0.2222000166773796, |
| "tpp_threshold_20_intended_diff_only": 0.3213000178337097, |
| "tpp_threshold_20_unintended_diff_only": 0.09910000115633011, |
| "tpp_threshold_50_total_metric": 0.250900012254715, |
| "tpp_threshold_50_intended_diff_only": 0.3960000157356262, |
| "tpp_threshold_50_unintended_diff_only": 0.14510000348091126, |
| "tpp_threshold_100_total_metric": 0.2614250183105469, |
| "tpp_threshold_100_intended_diff_only": 0.43100001811981203, |
| "tpp_threshold_100_unintended_diff_only": 0.16957499980926513, |
| "tpp_threshold_500_total_metric": 0.22402502447366715, |
| "tpp_threshold_500_intended_diff_only": 0.4466000318527221, |
| "tpp_threshold_500_unintended_diff_only": 0.22257500737905503 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.1256999969482422, |
| "tpp_threshold_2_intended_diff_only": 0.1483999967575073, |
| "tpp_threshold_2_unintended_diff_only": 0.022699999809265136, |
| "tpp_threshold_5_total_metric": 0.2574500203132629, |
| "tpp_threshold_5_intended_diff_only": 0.32540000677108766, |
| "tpp_threshold_5_unintended_diff_only": 0.06794998645782471, |
| "tpp_threshold_10_total_metric": 0.2759000241756439, |
| "tpp_threshold_10_intended_diff_only": 0.39780001640319823, |
| "tpp_threshold_10_unintended_diff_only": 0.12189999222755432, |
| "tpp_threshold_20_total_metric": 0.2640500247478485, |
| "tpp_threshold_20_intended_diff_only": 0.4424000263214111, |
| "tpp_threshold_20_unintended_diff_only": 0.17835000157356262, |
| "tpp_threshold_50_total_metric": 0.20715001225471497, |
| "tpp_threshold_50_intended_diff_only": 0.4588000178337097, |
| "tpp_threshold_50_unintended_diff_only": 0.25165000557899475, |
| "tpp_threshold_100_total_metric": 0.1675000011920929, |
| "tpp_threshold_100_intended_diff_only": 0.4624000072479248, |
| "tpp_threshold_100_unintended_diff_only": 0.2949000060558319, |
| "tpp_threshold_500_total_metric": 0.09320002496242523, |
| "tpp_threshold_500_intended_diff_only": 0.4630000352859497, |
| "tpp_threshold_500_unintended_diff_only": 0.3698000103235245 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.038799983263015744, |
| "tpp_threshold_2_intended_diff_only": 0.044999992847442626, |
| "tpp_threshold_2_unintended_diff_only": 0.0062000095844268795, |
| "tpp_threshold_5_total_metric": 0.07674999535083771, |
| "tpp_threshold_5_intended_diff_only": 0.08739999532699586, |
| "tpp_threshold_5_unintended_diff_only": 0.010649999976158142, |
| "tpp_threshold_10_total_metric": 0.11955001354217529, |
| "tpp_threshold_10_intended_diff_only": 0.13440001010894775, |
| "tpp_threshold_10_unintended_diff_only": 0.014849996566772461, |
| "tpp_threshold_20_total_metric": 0.1803500086069107, |
| "tpp_threshold_20_intended_diff_only": 0.2002000093460083, |
| "tpp_threshold_20_unintended_diff_only": 0.019850000739097595, |
| "tpp_threshold_50_total_metric": 0.294650012254715, |
| "tpp_threshold_50_intended_diff_only": 0.3332000136375427, |
| "tpp_threshold_50_unintended_diff_only": 0.038550001382827756, |
| "tpp_threshold_100_total_metric": 0.35535003542900084, |
| "tpp_threshold_100_intended_diff_only": 0.3996000289916992, |
| "tpp_threshold_100_unintended_diff_only": 0.04424999356269836, |
| "tpp_threshold_500_total_metric": 0.35485002398490906, |
| "tpp_threshold_500_intended_diff_only": 0.4302000284194946, |
| "tpp_threshold_500_unintended_diff_only": 0.07535000443458557 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.24.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.24.hook_resid_post", |
| "hook_layer": 24, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.15275000035762787, |
| "tpp_threshold_2_intended_diff_only": 0.20399999618530273, |
| "tpp_threshold_2_unintended_diff_only": 0.051249995827674866, |
| "tpp_threshold_5_total_metric": 0.2290000170469284, |
| "tpp_threshold_5_intended_diff_only": 0.34700000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.11799998581409454, |
| "tpp_threshold_10_total_metric": 0.22174999117851257, |
| "tpp_threshold_10_intended_diff_only": 0.3709999918937683, |
| "tpp_threshold_10_unintended_diff_only": 0.14925000071525574, |
| "tpp_threshold_20_total_metric": 0.19475001096725464, |
| "tpp_threshold_20_intended_diff_only": 0.40700000524520874, |
| "tpp_threshold_20_unintended_diff_only": 0.2122499942779541, |
| "tpp_threshold_50_total_metric": 0.16974999010562897, |
| "tpp_threshold_50_intended_diff_only": 0.4300000071525574, |
| "tpp_threshold_50_unintended_diff_only": 0.2602500170469284, |
| "tpp_threshold_100_total_metric": 0.13924996554851532, |
| "tpp_threshold_100_intended_diff_only": 0.43699997663497925, |
| "tpp_threshold_100_unintended_diff_only": 0.29775001108646393, |
| "tpp_threshold_500_total_metric": 0.06149999797344208, |
| "tpp_threshold_500_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_500_unintended_diff_only": 0.3765000253915787 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.15024997293949127, |
| "tpp_threshold_2_intended_diff_only": 0.19599997997283936, |
| "tpp_threshold_2_unintended_diff_only": 0.045750007033348083, |
| "tpp_threshold_5_total_metric": 0.20874999463558197, |
| "tpp_threshold_5_intended_diff_only": 0.2839999794960022, |
| "tpp_threshold_5_unintended_diff_only": 0.07524998486042023, |
| "tpp_threshold_10_total_metric": 0.23125000298023224, |
| "tpp_threshold_10_intended_diff_only": 0.3619999885559082, |
| "tpp_threshold_10_unintended_diff_only": 0.13074998557567596, |
| "tpp_threshold_20_total_metric": 0.23625002801418304, |
| "tpp_threshold_20_intended_diff_only": 0.4100000262260437, |
| "tpp_threshold_20_unintended_diff_only": 0.17374999821186066, |
| "tpp_threshold_50_total_metric": 0.16775000095367432, |
| "tpp_threshold_50_intended_diff_only": 0.4490000009536743, |
| "tpp_threshold_50_unintended_diff_only": 0.28125, |
| "tpp_threshold_100_total_metric": 0.1069999635219574, |
| "tpp_threshold_100_intended_diff_only": 0.45499998331069946, |
| "tpp_threshold_100_unintended_diff_only": 0.34800001978874207, |
| "tpp_threshold_500_total_metric": 0.06350001692771912, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.39250001311302185 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.07900002598762512, |
| "tpp_threshold_2_intended_diff_only": 0.07700002193450928, |
| "tpp_threshold_2_unintended_diff_only": -0.0020000040531158447, |
| "tpp_threshold_5_total_metric": 0.30250005424022675, |
| "tpp_threshold_5_intended_diff_only": 0.33500003814697266, |
| "tpp_threshold_5_unintended_diff_only": 0.03249998390674591, |
| "tpp_threshold_10_total_metric": 0.29300008714199066, |
| "tpp_threshold_10_intended_diff_only": 0.3890000581741333, |
| "tpp_threshold_10_unintended_diff_only": 0.09599997103214264, |
| "tpp_threshold_20_total_metric": 0.30400003492832184, |
| "tpp_threshold_20_intended_diff_only": 0.43800002336502075, |
| "tpp_threshold_20_unintended_diff_only": 0.1339999884366989, |
| "tpp_threshold_50_total_metric": 0.24025005102157593, |
| "tpp_threshold_50_intended_diff_only": 0.4500000476837158, |
| "tpp_threshold_50_unintended_diff_only": 0.2097499966621399, |
| "tpp_threshold_100_total_metric": 0.1757500171661377, |
| "tpp_threshold_100_intended_diff_only": 0.453000009059906, |
| "tpp_threshold_100_unintended_diff_only": 0.2772499918937683, |
| "tpp_threshold_500_total_metric": 0.06900005042552948, |
| "tpp_threshold_500_intended_diff_only": 0.4540000557899475, |
| "tpp_threshold_500_unintended_diff_only": 0.38500000536441803 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.08525003492832184, |
| "tpp_threshold_2_intended_diff_only": 0.08600002527236938, |
| "tpp_threshold_2_unintended_diff_only": 0.0007499903440475464, |
| "tpp_threshold_5_total_metric": 0.24650001525878906, |
| "tpp_threshold_5_intended_diff_only": 0.27799999713897705, |
| "tpp_threshold_5_unintended_diff_only": 0.03149998188018799, |
| "tpp_threshold_10_total_metric": 0.33000004291534424, |
| "tpp_threshold_10_intended_diff_only": 0.4230000376701355, |
| "tpp_threshold_10_unintended_diff_only": 0.09299999475479126, |
| "tpp_threshold_20_total_metric": 0.3032500296831131, |
| "tpp_threshold_20_intended_diff_only": 0.487000048160553, |
| "tpp_threshold_20_unintended_diff_only": 0.18375001847743988, |
| "tpp_threshold_50_total_metric": 0.22175000607967377, |
| "tpp_threshold_50_intended_diff_only": 0.49000000953674316, |
| "tpp_threshold_50_unintended_diff_only": 0.2682500034570694, |
| "tpp_threshold_100_total_metric": 0.22150003910064697, |
| "tpp_threshold_100_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_100_unintended_diff_only": 0.2705000042915344, |
| "tpp_threshold_500_total_metric": 0.1535000503063202, |
| "tpp_threshold_500_intended_diff_only": 0.4920000433921814, |
| "tpp_threshold_500_unintended_diff_only": 0.3384999930858612 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.16124995052814484, |
| "tpp_threshold_2_intended_diff_only": 0.17899996042251587, |
| "tpp_threshold_2_unintended_diff_only": 0.017750009894371033, |
| "tpp_threshold_5_total_metric": 0.3005000203847885, |
| "tpp_threshold_5_intended_diff_only": 0.3830000162124634, |
| "tpp_threshold_5_unintended_diff_only": 0.08249999582767487, |
| "tpp_threshold_10_total_metric": 0.3034999966621399, |
| "tpp_threshold_10_intended_diff_only": 0.4440000057220459, |
| "tpp_threshold_10_unintended_diff_only": 0.140500009059906, |
| "tpp_threshold_20_total_metric": 0.28200002014636993, |
| "tpp_threshold_20_intended_diff_only": 0.4700000286102295, |
| "tpp_threshold_20_unintended_diff_only": 0.18800000846385956, |
| "tpp_threshold_50_total_metric": 0.23625001311302185, |
| "tpp_threshold_50_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_50_unintended_diff_only": 0.23875001072883606, |
| "tpp_threshold_100_total_metric": 0.1940000206232071, |
| "tpp_threshold_100_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_100_unintended_diff_only": 0.2810000032186508, |
| "tpp_threshold_500_total_metric": 0.1185000091791153, |
| "tpp_threshold_500_intended_diff_only": 0.4750000238418579, |
| "tpp_threshold_500_unintended_diff_only": 0.3565000146627426 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.01524999737739563, |
| "tpp_threshold_2_intended_diff_only": 0.023000001907348633, |
| "tpp_threshold_2_unintended_diff_only": 0.007750004529953003, |
| "tpp_threshold_5_total_metric": 0.03200000524520874, |
| "tpp_threshold_5_intended_diff_only": 0.04100000858306885, |
| "tpp_threshold_5_unintended_diff_only": 0.009000003337860107, |
| "tpp_threshold_10_total_metric": 0.0780000239610672, |
| "tpp_threshold_10_intended_diff_only": 0.09500002861022949, |
| "tpp_threshold_10_unintended_diff_only": 0.017000004649162292, |
| "tpp_threshold_20_total_metric": 0.1757500022649765, |
| "tpp_threshold_20_intended_diff_only": 0.19900000095367432, |
| "tpp_threshold_20_unintended_diff_only": 0.023249998688697815, |
| "tpp_threshold_50_total_metric": 0.3057500422000885, |
| "tpp_threshold_50_intended_diff_only": 0.33500003814697266, |
| "tpp_threshold_50_unintended_diff_only": 0.029249995946884155, |
| "tpp_threshold_100_total_metric": 0.3822500556707382, |
| "tpp_threshold_100_intended_diff_only": 0.41300004720687866, |
| "tpp_threshold_100_unintended_diff_only": 0.030749991536140442, |
| "tpp_threshold_500_total_metric": 0.405750036239624, |
| "tpp_threshold_500_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_500_unintended_diff_only": 0.05024999380111694 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.08374999463558197, |
| "tpp_threshold_2_intended_diff_only": 0.09200000762939453, |
| "tpp_threshold_2_unintended_diff_only": 0.008250012993812561, |
| "tpp_threshold_5_total_metric": 0.1507500410079956, |
| "tpp_threshold_5_intended_diff_only": 0.16500002145767212, |
| "tpp_threshold_5_unintended_diff_only": 0.014249980449676514, |
| "tpp_threshold_10_total_metric": 0.21875004470348358, |
| "tpp_threshold_10_intended_diff_only": 0.23400002717971802, |
| "tpp_threshold_10_unintended_diff_only": 0.015249982476234436, |
| "tpp_threshold_20_total_metric": 0.3157500624656677, |
| "tpp_threshold_20_intended_diff_only": 0.3340000510215759, |
| "tpp_threshold_20_unintended_diff_only": 0.018249988555908203, |
| "tpp_threshold_50_total_metric": 0.3472500443458557, |
| "tpp_threshold_50_intended_diff_only": 0.39900004863739014, |
| "tpp_threshold_50_unintended_diff_only": 0.051750004291534424, |
| "tpp_threshold_100_total_metric": 0.3747500479221344, |
| "tpp_threshold_100_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_100_unintended_diff_only": 0.057249993085861206, |
| "tpp_threshold_500_total_metric": 0.35075007379055023, |
| "tpp_threshold_500_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_500_unintended_diff_only": 0.08824999630451202 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.0022500604391098022, |
| "tpp_threshold_2_intended_diff_only": -0.001000046730041504, |
| "tpp_threshold_2_unintended_diff_only": 0.0012500137090682983, |
| "tpp_threshold_5_total_metric": 0.00024993717670440674, |
| "tpp_threshold_5_intended_diff_only": 0.008999943733215332, |
| "tpp_threshold_5_unintended_diff_only": 0.008750006556510925, |
| "tpp_threshold_10_total_metric": 0.007749974727630615, |
| "tpp_threshold_10_intended_diff_only": 0.019999980926513672, |
| "tpp_threshold_10_unintended_diff_only": 0.012250006198883057, |
| "tpp_threshold_20_total_metric": 0.034999966621398926, |
| "tpp_threshold_20_intended_diff_only": 0.04899996519088745, |
| "tpp_threshold_20_unintended_diff_only": 0.013999998569488525, |
| "tpp_threshold_50_total_metric": 0.26774999499320984, |
| "tpp_threshold_50_intended_diff_only": 0.33799999952316284, |
| "tpp_threshold_50_unintended_diff_only": 0.070250004529953, |
| "tpp_threshold_100_total_metric": 0.33799999952316284, |
| "tpp_threshold_100_intended_diff_only": 0.4169999957084656, |
| "tpp_threshold_100_unintended_diff_only": 0.07899999618530273, |
| "tpp_threshold_500_total_metric": 0.30150000751018524, |
| "tpp_threshold_500_intended_diff_only": 0.43400001525878906, |
| "tpp_threshold_500_unintended_diff_only": 0.13250000774860382 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.012999996542930603, |
| "tpp_threshold_2_intended_diff_only": 0.018000006675720215, |
| "tpp_threshold_2_unintended_diff_only": 0.005000010132789612, |
| "tpp_threshold_5_total_metric": 0.06800001859664917, |
| "tpp_threshold_5_intended_diff_only": 0.07700002193450928, |
| "tpp_threshold_5_unintended_diff_only": 0.009000003337860107, |
| "tpp_threshold_10_total_metric": 0.12100003659725189, |
| "tpp_threshold_10_intended_diff_only": 0.13200002908706665, |
| "tpp_threshold_10_unintended_diff_only": 0.010999992489814758, |
| "tpp_threshold_20_total_metric": 0.1599999964237213, |
| "tpp_threshold_20_intended_diff_only": 0.18000000715255737, |
| "tpp_threshold_20_unintended_diff_only": 0.02000001072883606, |
| "tpp_threshold_50_total_metric": 0.23475000262260437, |
| "tpp_threshold_50_intended_diff_only": 0.25, |
| "tpp_threshold_50_unintended_diff_only": 0.01524999737739563, |
| "tpp_threshold_100_total_metric": 0.3277500420808792, |
| "tpp_threshold_100_intended_diff_only": 0.3540000319480896, |
| "tpp_threshold_100_unintended_diff_only": 0.026249989867210388, |
| "tpp_threshold_500_total_metric": 0.3749999850988388, |
| "tpp_threshold_500_intended_diff_only": 0.4309999942779541, |
| "tpp_threshold_500_unintended_diff_only": 0.056000009179115295 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.08424998819828033, |
| "tpp_threshold_2_intended_diff_only": 0.09299999475479126, |
| "tpp_threshold_2_unintended_diff_only": 0.008750006556510925, |
| "tpp_threshold_5_total_metric": 0.13274997472763062, |
| "tpp_threshold_5_intended_diff_only": 0.14499998092651367, |
| "tpp_threshold_5_unintended_diff_only": 0.012250006198883057, |
| "tpp_threshold_10_total_metric": 0.17224998772144318, |
| "tpp_threshold_10_intended_diff_only": 0.19099998474121094, |
| "tpp_threshold_10_unintended_diff_only": 0.01874999701976776, |
| "tpp_threshold_20_total_metric": 0.21525001525878906, |
| "tpp_threshold_20_intended_diff_only": 0.23900002241134644, |
| "tpp_threshold_20_unintended_diff_only": 0.023750007152557373, |
| "tpp_threshold_50_total_metric": 0.3177499771118164, |
| "tpp_threshold_50_intended_diff_only": 0.343999981880188, |
| "tpp_threshold_50_unintended_diff_only": 0.026250004768371582, |
| "tpp_threshold_100_total_metric": 0.3540000319480896, |
| "tpp_threshold_100_intended_diff_only": 0.38200002908706665, |
| "tpp_threshold_100_unintended_diff_only": 0.02799999713897705, |
| "tpp_threshold_500_total_metric": 0.341250017285347, |
| "tpp_threshold_500_intended_diff_only": 0.39100003242492676, |
| "tpp_threshold_500_unintended_diff_only": 0.04975001513957977 |
| } |
| } |
| } |
| } |