| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745755674193, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9586437880992889, |
| "llm_top_1_test_accuracy": 0.7063249999999999, |
| "llm_top_2_test_accuracy": 0.7488999999999999, |
| "llm_top_5_test_accuracy": 0.8143375, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9559687942266464, |
| "sae_top_1_test_accuracy": 0.82021875, |
| "sae_top_2_test_accuracy": 0.85463125, |
| "sae_top_5_test_accuracy": 0.8954249999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9692000389099121, |
| "llm_top_1_test_accuracy": 0.6884, |
| "llm_top_2_test_accuracy": 0.7354, |
| "llm_top_5_test_accuracy": 0.8140000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9644000411033631, |
| "sae_top_1_test_accuracy": 0.8154, |
| "sae_top_2_test_accuracy": 0.8882, |
| "sae_top_5_test_accuracy": 0.916, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9556000471115113, |
| "llm_top_1_test_accuracy": 0.6532, |
| "llm_top_2_test_accuracy": 0.6836, |
| "llm_top_5_test_accuracy": 0.7530000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.946000051498413, |
| "sae_top_1_test_accuracy": 0.766, |
| "sae_top_2_test_accuracy": 0.8472, |
| "sae_top_5_test_accuracy": 0.8699999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9272000312805175, |
| "llm_top_1_test_accuracy": 0.6908, |
| "llm_top_2_test_accuracy": 0.7049999999999998, |
| "llm_top_5_test_accuracy": 0.7592000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9274000287055969, |
| "sae_top_1_test_accuracy": 0.7942, |
| "sae_top_2_test_accuracy": 0.8019999999999999, |
| "sae_top_5_test_accuracy": 0.8443999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9304000496864319, |
| "llm_top_1_test_accuracy": 0.6856, |
| "llm_top_2_test_accuracy": 0.7644, |
| "llm_top_5_test_accuracy": 0.8042, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9318000435829162, |
| "sae_top_1_test_accuracy": 0.7597999999999999, |
| "sae_top_2_test_accuracy": 0.8156000000000001, |
| "sae_top_5_test_accuracy": 0.8432000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9685000479221344, |
| "llm_top_1_test_accuracy": 0.675, |
| "llm_top_2_test_accuracy": 0.71, |
| "llm_top_5_test_accuracy": 0.811, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9570000469684601, |
| "sae_top_1_test_accuracy": 0.805, |
| "sae_top_2_test_accuracy": 0.811, |
| "sae_top_5_test_accuracy": 0.886, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9712000370025635, |
| "llm_top_1_test_accuracy": 0.638, |
| "llm_top_2_test_accuracy": 0.6716, |
| "llm_top_5_test_accuracy": 0.7682, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9728000640869141, |
| "sae_top_1_test_accuracy": 0.8156000000000001, |
| "sae_top_2_test_accuracy": 0.8216000000000001, |
| "sae_top_5_test_accuracy": 0.9248, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9472500383853912, |
| "llm_top_1_test_accuracy": 0.6499999999999999, |
| "llm_top_2_test_accuracy": 0.726, |
| "llm_top_5_test_accuracy": 0.8055, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9487500488758087, |
| "sae_top_1_test_accuracy": 0.80675, |
| "sae_top_2_test_accuracy": 0.8522500000000001, |
| "sae_top_5_test_accuracy": 0.8789999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.9696, |
| "llm_top_2_test_accuracy": 0.9952, |
| "llm_top_5_test_accuracy": 0.9996, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9996000289916992, |
| "sae_top_1_test_accuracy": 0.999, |
| "sae_top_2_test_accuracy": 0.9992000000000001, |
| "sae_top_5_test_accuracy": 1.0, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.24.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.24.hook_resid_post", |
| "hook_layer": 24, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9540000557899475, |
| "1": 0.9650000333786011, |
| "2": 0.9450000524520874, |
| "6": 0.9890000224113464, |
| "9": 0.9690000414848328 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9490000605583191, |
| "1": 0.9660000205039978, |
| "2": 0.956000030040741, |
| "6": 0.9930000305175781, |
| "9": 0.9820000529289246 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.622, |
| "1": 0.624, |
| "2": 0.655, |
| "6": 0.813, |
| "9": 0.728 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.605, |
| "1": 0.625, |
| "2": 0.835, |
| "6": 0.835, |
| "9": 0.777 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.736, |
| "1": 0.681, |
| "2": 0.862, |
| "6": 0.896, |
| "9": 0.895 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.638, |
| "1": 0.807, |
| "2": 0.875, |
| "6": 0.813, |
| "9": 0.944 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.868, |
| "1": 0.811, |
| "2": 0.863, |
| "6": 0.94, |
| "9": 0.959 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.874, |
| "1": 0.865, |
| "2": 0.895, |
| "6": 0.985, |
| "9": 0.961 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9570000171661377, |
| "13": 0.9460000395774841, |
| "14": 0.9490000605583191, |
| "18": 0.9160000681877136, |
| "19": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9590000510215759, |
| "13": 0.9450000524520874, |
| "14": 0.9690000414848328, |
| "18": 0.9440000653266907, |
| "19": 0.9610000252723694 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.589, |
| "13": 0.645, |
| "14": 0.611, |
| "18": 0.669, |
| "19": 0.752 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.629, |
| "13": 0.672, |
| "14": 0.614, |
| "18": 0.738, |
| "19": 0.765 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.821, |
| "13": 0.746, |
| "14": 0.665, |
| "18": 0.749, |
| "19": 0.784 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.852, |
| "13": 0.653, |
| "14": 0.904, |
| "18": 0.655, |
| "19": 0.766 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.883, |
| "13": 0.759, |
| "14": 0.9, |
| "18": 0.86, |
| "19": 0.834 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.938, |
| "13": 0.815, |
| "14": 0.894, |
| "18": 0.872, |
| "19": 0.831 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9570000171661377, |
| "21": 0.9040000438690186, |
| "22": 0.9200000166893005, |
| "25": 0.9570000171661377, |
| "26": 0.8990000486373901 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9500000476837158, |
| "21": 0.9140000343322754, |
| "22": 0.9240000247955322, |
| "25": 0.9650000333786011, |
| "26": 0.8830000162124634 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.722, |
| "21": 0.718, |
| "22": 0.658, |
| "25": 0.722, |
| "26": 0.634 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.719, |
| "21": 0.733, |
| "22": 0.67, |
| "25": 0.711, |
| "26": 0.692 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.847, |
| "21": 0.798, |
| "22": 0.672, |
| "25": 0.739, |
| "26": 0.74 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.852, |
| "21": 0.749, |
| "22": 0.858, |
| "25": 0.865, |
| "26": 0.647 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.866, |
| "21": 0.769, |
| "22": 0.85, |
| "25": 0.874, |
| "26": 0.651 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.892, |
| "21": 0.8, |
| "22": 0.863, |
| "25": 0.897, |
| "26": 0.77 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9530000686645508, |
| "2": 0.9470000267028809, |
| "3": 0.9360000491142273, |
| "5": 0.9360000491142273, |
| "6": 0.8870000243186951 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9590000510215759, |
| "2": 0.937000036239624, |
| "3": 0.937000036239624, |
| "5": 0.9260000586509705, |
| "6": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.735, |
| "2": 0.753, |
| "3": 0.607, |
| "5": 0.71, |
| "6": 0.623 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.822, |
| "2": 0.824, |
| "3": 0.709, |
| "5": 0.775, |
| "6": 0.692 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.874, |
| "2": 0.86, |
| "3": 0.759, |
| "5": 0.815, |
| "6": 0.713 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.836, |
| "2": 0.851, |
| "3": 0.577, |
| "5": 0.811, |
| "6": 0.724 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.882, |
| "2": 0.884, |
| "3": 0.711, |
| "5": 0.858, |
| "6": 0.743 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.902, |
| "2": 0.884, |
| "3": 0.761, |
| "5": 0.896, |
| "6": 0.773 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9580000638961792, |
| "5.0": 0.956000030040741 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9660000205039978, |
| "5.0": 0.971000075340271 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.675, |
| "5.0": 0.675 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.71, |
| "5.0": 0.71 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.811, |
| "5.0": 0.811 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.805, |
| "5.0": 0.805 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.811, |
| "5.0": 0.811 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.886, |
| "5.0": 0.886 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9630000591278076, |
| "Python": 0.9850000739097595, |
| "HTML": 0.9830000400543213, |
| "Java": 0.971000075340271, |
| "PHP": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9570000171661377, |
| "Python": 0.9880000352859497, |
| "HTML": 0.984000027179718, |
| "Java": 0.9630000591278076, |
| "PHP": 0.9640000462532043 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.68, |
| "Python": 0.611, |
| "HTML": 0.683, |
| "Java": 0.639, |
| "PHP": 0.577 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.701, |
| "Python": 0.715, |
| "HTML": 0.673, |
| "Java": 0.655, |
| "PHP": 0.614 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.746, |
| "Python": 0.737, |
| "HTML": 0.873, |
| "Java": 0.782, |
| "PHP": 0.703 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.627, |
| "Python": 0.959, |
| "HTML": 0.914, |
| "Java": 0.645, |
| "PHP": 0.933 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.636, |
| "Python": 0.961, |
| "HTML": 0.922, |
| "Java": 0.648, |
| "PHP": 0.941 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.859, |
| "Python": 0.97, |
| "HTML": 0.952, |
| "Java": 0.903, |
| "PHP": 0.94 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9280000329017639, |
| "1": 0.9860000610351562, |
| "2": 0.9230000376701355, |
| "3": 0.9580000638961792 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9430000185966492, |
| "1": 0.9880000352859497, |
| "2": 0.9220000505447388, |
| "3": 0.9360000491142273 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.633, |
| "1": 0.638, |
| "2": 0.633, |
| "3": 0.696 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.721, |
| "1": 0.76, |
| "2": 0.638, |
| "3": 0.785 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.823, |
| "1": 0.821, |
| "2": 0.764, |
| "3": 0.814 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.876, |
| "1": 0.857, |
| "2": 0.824, |
| "3": 0.67 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.875, |
| "1": 0.971, |
| "2": 0.838, |
| "3": 0.725 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.886, |
| "1": 0.971, |
| "2": 0.831, |
| "3": 0.828 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.997, |
| "fr": 0.998, |
| "de": 0.981, |
| "es": 0.874, |
| "nl": 0.998 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 0.996, |
| "es": 0.984, |
| "nl": 0.999 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999, |
| "nl": 1.0 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.997, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 0.999, |
| "nl": 1.0 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |