| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753462593, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9447562988847494, |
| "llm_top_1_test_accuracy": 0.70701875, |
| "llm_top_2_test_accuracy": 0.7669187500000001, |
| "llm_top_5_test_accuracy": 0.82511875, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9392062962055207, |
| "sae_top_1_test_accuracy": 0.7778812500000001, |
| "sae_top_2_test_accuracy": 0.8093375, |
| "sae_top_5_test_accuracy": 0.8683312500000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9630000591278076, |
| "llm_top_1_test_accuracy": 0.6832, |
| "llm_top_2_test_accuracy": 0.7314, |
| "llm_top_5_test_accuracy": 0.8288, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9586000561714172, |
| "sae_top_1_test_accuracy": 0.7417999999999999, |
| "sae_top_2_test_accuracy": 0.8353999999999999, |
| "sae_top_5_test_accuracy": 0.892, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9546000361442566, |
| "llm_top_1_test_accuracy": 0.7295999999999999, |
| "llm_top_2_test_accuracy": 0.7484, |
| "llm_top_5_test_accuracy": 0.8056000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9410000443458557, |
| "sae_top_1_test_accuracy": 0.7023999999999999, |
| "sae_top_2_test_accuracy": 0.7706, |
| "sae_top_5_test_accuracy": 0.8273999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9240000486373902, |
| "llm_top_1_test_accuracy": 0.6786, |
| "llm_top_2_test_accuracy": 0.741, |
| "llm_top_5_test_accuracy": 0.7786, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9124000430107116, |
| "sae_top_1_test_accuracy": 0.7596, |
| "sae_top_2_test_accuracy": 0.7676000000000001, |
| "sae_top_5_test_accuracy": 0.8502000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.906600046157837, |
| "llm_top_1_test_accuracy": 0.6828, |
| "llm_top_2_test_accuracy": 0.7268000000000001, |
| "llm_top_5_test_accuracy": 0.7798, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9050000429153442, |
| "sae_top_1_test_accuracy": 0.7552, |
| "sae_top_2_test_accuracy": 0.7514000000000001, |
| "sae_top_5_test_accuracy": 0.817, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.8905000686645508, |
| "llm_top_1_test_accuracy": 0.613, |
| "llm_top_2_test_accuracy": 0.657, |
| "llm_top_5_test_accuracy": 0.703, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.877500057220459, |
| "sae_top_1_test_accuracy": 0.656, |
| "sae_top_2_test_accuracy": 0.681, |
| "sae_top_5_test_accuracy": 0.748, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9748000621795654, |
| "llm_top_1_test_accuracy": 0.7485999999999999, |
| "llm_top_2_test_accuracy": 0.8304, |
| "llm_top_5_test_accuracy": 0.8728, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9726000428199768, |
| "sae_top_1_test_accuracy": 0.8523999999999999, |
| "sae_top_2_test_accuracy": 0.8832000000000001, |
| "sae_top_5_test_accuracy": 0.9176, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9447500556707382, |
| "llm_top_1_test_accuracy": 0.71475, |
| "llm_top_2_test_accuracy": 0.7917500000000001, |
| "llm_top_5_test_accuracy": 0.8697499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9477500319480896, |
| "sae_top_1_test_accuracy": 0.8362499999999999, |
| "sae_top_2_test_accuracy": 0.8435, |
| "sae_top_5_test_accuracy": 0.8972500000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.8055999999999999, |
| "llm_top_2_test_accuracy": 0.9086000000000001, |
| "llm_top_5_test_accuracy": 0.9625999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9988000512123107, |
| "sae_top_1_test_accuracy": 0.9194000000000001, |
| "sae_top_2_test_accuracy": 0.942, |
| "sae_top_5_test_accuracy": 0.9972, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.1.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.1.hook_resid_post", |
| "hook_layer": 1, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9240000247955322, |
| "1": 0.9630000591278076, |
| "2": 0.9480000734329224, |
| "6": 0.987000048160553, |
| "9": 0.971000075340271 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9720000624656677, |
| "2": 0.9410000443458557, |
| "6": 0.9910000562667847, |
| "9": 0.971000075340271 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.611, |
| "1": 0.633, |
| "2": 0.567, |
| "6": 0.786, |
| "9": 0.819 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.697, |
| "1": 0.673, |
| "2": 0.698, |
| "6": 0.796, |
| "9": 0.793 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.754, |
| "1": 0.759, |
| "2": 0.866, |
| "6": 0.895, |
| "9": 0.87 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.708, |
| "1": 0.639, |
| "2": 0.859, |
| "6": 0.735, |
| "9": 0.768 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.709, |
| "1": 0.705, |
| "2": 0.886, |
| "6": 0.943, |
| "9": 0.934 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.801, |
| "1": 0.836, |
| "2": 0.906, |
| "6": 0.963, |
| "9": 0.954 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9590000510215759, |
| "13": 0.9460000395774841, |
| "14": 0.9430000185966492, |
| "18": 0.9040000438690186, |
| "19": 0.9530000686645508 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.956000030040741, |
| "13": 0.9540000557899475, |
| "14": 0.9600000381469727, |
| "18": 0.9330000281333923, |
| "19": 0.9700000286102295 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.711, |
| "13": 0.769, |
| "14": 0.662, |
| "18": 0.686, |
| "19": 0.82 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.748, |
| "13": 0.752, |
| "14": 0.719, |
| "18": 0.687, |
| "19": 0.836 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.857, |
| "13": 0.802, |
| "14": 0.811, |
| "18": 0.722, |
| "19": 0.836 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.705, |
| "13": 0.635, |
| "14": 0.666, |
| "18": 0.687, |
| "19": 0.819 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.695, |
| "13": 0.74, |
| "14": 0.85, |
| "18": 0.715, |
| "19": 0.853 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.859, |
| "13": 0.82, |
| "14": 0.862, |
| "18": 0.738, |
| "19": 0.858 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9300000667572021, |
| "21": 0.9040000438690186, |
| "22": 0.8990000486373901, |
| "25": 0.9550000429153442, |
| "26": 0.8740000128746033 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9480000734329224, |
| "21": 0.9130000472068787, |
| "22": 0.9240000247955322, |
| "25": 0.9550000429153442, |
| "26": 0.8800000548362732 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.762, |
| "21": 0.741, |
| "22": 0.633, |
| "25": 0.667, |
| "26": 0.59 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.842, |
| "21": 0.728, |
| "22": 0.713, |
| "25": 0.775, |
| "26": 0.647 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.866, |
| "21": 0.813, |
| "22": 0.727, |
| "25": 0.781, |
| "26": 0.706 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.757, |
| "21": 0.74, |
| "22": 0.745, |
| "25": 0.836, |
| "26": 0.72 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.771, |
| "21": 0.764, |
| "22": 0.742, |
| "25": 0.855, |
| "26": 0.706 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.906, |
| "21": 0.776, |
| "22": 0.864, |
| "25": 0.894, |
| "26": 0.811 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9340000152587891, |
| "2": 0.921000063419342, |
| "3": 0.8960000276565552, |
| "5": 0.9160000681877136, |
| "6": 0.8580000400543213 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9290000200271606, |
| "2": 0.9290000200271606, |
| "3": 0.9020000696182251, |
| "5": 0.9110000729560852, |
| "6": 0.862000048160553 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.712, |
| "2": 0.763, |
| "3": 0.647, |
| "5": 0.606, |
| "6": 0.686 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.772, |
| "2": 0.76, |
| "3": 0.676, |
| "5": 0.739, |
| "6": 0.687 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.826, |
| "2": 0.82, |
| "3": 0.754, |
| "5": 0.779, |
| "6": 0.72 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.729, |
| "2": 0.865, |
| "3": 0.604, |
| "5": 0.867, |
| "6": 0.711 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.706, |
| "2": 0.876, |
| "3": 0.624, |
| "5": 0.862, |
| "6": 0.689 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.859, |
| "2": 0.873, |
| "3": 0.763, |
| "5": 0.878, |
| "6": 0.712 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.8790000677108765, |
| "5.0": 0.8760000467300415 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.8880000710487366, |
| "5.0": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.613, |
| "5.0": 0.613 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.657, |
| "5.0": 0.657 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.703, |
| "5.0": 0.703 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.656, |
| "5.0": 0.656 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.681, |
| "5.0": 0.681 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.748, |
| "5.0": 0.748 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9600000381469727, |
| "Python": 0.987000048160553, |
| "HTML": 0.9900000691413879, |
| "Java": 0.9660000205039978, |
| "PHP": 0.9600000381469727 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.971000075340271, |
| "Python": 0.9900000691413879, |
| "HTML": 0.9860000610351562, |
| "Java": 0.9690000414848328, |
| "PHP": 0.9580000638961792 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.736, |
| "Python": 0.663, |
| "HTML": 0.917, |
| "Java": 0.636, |
| "PHP": 0.791 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.786, |
| "Python": 0.811, |
| "HTML": 0.965, |
| "Java": 0.738, |
| "PHP": 0.852 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.869, |
| "Python": 0.85, |
| "HTML": 0.95, |
| "Java": 0.823, |
| "PHP": 0.872 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.742, |
| "Python": 0.905, |
| "HTML": 0.934, |
| "Java": 0.796, |
| "PHP": 0.885 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.862, |
| "Python": 0.896, |
| "HTML": 0.95, |
| "Java": 0.8, |
| "PHP": 0.908 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.878, |
| "Python": 0.923, |
| "HTML": 0.954, |
| "Java": 0.917, |
| "PHP": 0.916 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9330000281333923, |
| "1": 0.984000027179718, |
| "2": 0.9200000166893005, |
| "3": 0.9540000557899475 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9330000281333923, |
| "1": 0.9850000739097595, |
| "2": 0.9170000553131104, |
| "3": 0.9440000653266907 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.728, |
| "1": 0.721, |
| "2": 0.701, |
| "3": 0.709 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.782, |
| "1": 0.909, |
| "2": 0.686, |
| "3": 0.79 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.846, |
| "1": 0.949, |
| "2": 0.837, |
| "3": 0.847 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.8, |
| "1": 0.969, |
| "2": 0.824, |
| "3": 0.752 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.82, |
| "1": 0.962, |
| "2": 0.818, |
| "3": 0.774 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.903, |
| "1": 0.975, |
| "2": 0.83, |
| "3": 0.881 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9970000386238098, |
| "fr": 0.999000072479248, |
| "de": 0.999000072479248, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.892, |
| "fr": 0.733, |
| "de": 0.859, |
| "es": 0.779, |
| "nl": 0.765 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.991, |
| "fr": 0.813, |
| "de": 0.861, |
| "es": 0.965, |
| "nl": 0.913 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.975, |
| "de": 0.88, |
| "es": 0.993, |
| "nl": 0.966 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.996, |
| "de": 0.8, |
| "es": 0.936, |
| "nl": 0.865 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.997, |
| "de": 0.872, |
| "es": 0.953, |
| "nl": 0.889 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.996, |
| "nl": 0.995 |
| } |
| } |
| } |
| } |