| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754840938, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.95909378901124, |
| "llm_top_1_test_accuracy": 0.6874125, |
| "llm_top_2_test_accuracy": 0.74785625, |
| "llm_top_5_test_accuracy": 0.81445625, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9582250386476516, |
| "sae_top_1_test_accuracy": 0.7983249999999998, |
| "sae_top_2_test_accuracy": 0.85949375, |
| "sae_top_5_test_accuracy": 0.9130312499999998, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9692000389099121, |
| "llm_top_1_test_accuracy": 0.6634, |
| "llm_top_2_test_accuracy": 0.7162, |
| "llm_top_5_test_accuracy": 0.8002, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9664000272750854, |
| "sae_top_1_test_accuracy": 0.8210000000000001, |
| "sae_top_2_test_accuracy": 0.9026, |
| "sae_top_5_test_accuracy": 0.9283999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9514000296592713, |
| "llm_top_1_test_accuracy": 0.6702000000000001, |
| "llm_top_2_test_accuracy": 0.7120000000000001, |
| "llm_top_5_test_accuracy": 0.7737999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9498000502586365, |
| "sae_top_1_test_accuracy": 0.7188000000000001, |
| "sae_top_2_test_accuracy": 0.8097999999999999, |
| "sae_top_5_test_accuracy": 0.901, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9298000454902648, |
| "llm_top_1_test_accuracy": 0.6961999999999999, |
| "llm_top_2_test_accuracy": 0.7348000000000001, |
| "llm_top_5_test_accuracy": 0.7846, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9272000312805175, |
| "sae_top_1_test_accuracy": 0.7807999999999999, |
| "sae_top_2_test_accuracy": 0.841, |
| "sae_top_5_test_accuracy": 0.8767999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9322000503540039, |
| "llm_top_1_test_accuracy": 0.6325999999999999, |
| "llm_top_2_test_accuracy": 0.7118, |
| "llm_top_5_test_accuracy": 0.7537999999999998, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9318000316619873, |
| "sae_top_1_test_accuracy": 0.7916, |
| "sae_top_2_test_accuracy": 0.8106, |
| "sae_top_5_test_accuracy": 0.8436, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9725000560283661, |
| "llm_top_1_test_accuracy": 0.693, |
| "llm_top_2_test_accuracy": 0.743, |
| "llm_top_5_test_accuracy": 0.788, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9695000350475311, |
| "sae_top_1_test_accuracy": 0.816, |
| "sae_top_2_test_accuracy": 0.842, |
| "sae_top_5_test_accuracy": 0.945, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9730000376701355, |
| "llm_top_1_test_accuracy": 0.6319999999999999, |
| "llm_top_2_test_accuracy": 0.6914000000000001, |
| "llm_top_5_test_accuracy": 0.7806, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9722000360488892, |
| "sae_top_1_test_accuracy": 0.6936, |
| "sae_top_2_test_accuracy": 0.8186, |
| "sae_top_5_test_accuracy": 0.9299999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9452500343322754, |
| "llm_top_1_test_accuracy": 0.6795, |
| "llm_top_2_test_accuracy": 0.74325, |
| "llm_top_5_test_accuracy": 0.8432499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9495000541210175, |
| "sae_top_1_test_accuracy": 0.789, |
| "sae_top_2_test_accuracy": 0.85375, |
| "sae_top_5_test_accuracy": 0.8812500000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9994000196456909, |
| "llm_top_1_test_accuracy": 0.8324, |
| "llm_top_2_test_accuracy": 0.9304, |
| "llm_top_5_test_accuracy": 0.9914, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9994000434875489, |
| "sae_top_1_test_accuracy": 0.9757999999999999, |
| "sae_top_2_test_accuracy": 0.9975999999999999, |
| "sae_top_5_test_accuracy": 0.9982000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.18.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.18.hook_resid_post", |
| "hook_layer": 18, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9520000219345093, |
| "1": 0.9610000252723694, |
| "2": 0.9570000171661377, |
| "6": 0.9920000433921814, |
| "9": 0.9700000286102295 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9550000429153442, |
| "1": 0.9610000252723694, |
| "2": 0.9590000510215759, |
| "6": 0.9910000562667847, |
| "9": 0.9800000190734863 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.573, |
| "1": 0.644, |
| "2": 0.638, |
| "6": 0.775, |
| "9": 0.687 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.576, |
| "1": 0.644, |
| "2": 0.762, |
| "6": 0.872, |
| "9": 0.727 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.714, |
| "1": 0.693, |
| "2": 0.818, |
| "6": 0.892, |
| "9": 0.884 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.835, |
| "1": 0.651, |
| "2": 0.892, |
| "6": 0.781, |
| "9": 0.946 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.836, |
| "1": 0.833, |
| "2": 0.916, |
| "6": 0.977, |
| "9": 0.951 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.839, |
| "1": 0.937, |
| "2": 0.917, |
| "6": 0.99, |
| "9": 0.959 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9640000462532043, |
| "13": 0.9490000605583191, |
| "14": 0.956000030040741, |
| "18": 0.9270000457763672, |
| "19": 0.9530000686645508 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9570000171661377, |
| "13": 0.9420000314712524, |
| "14": 0.9660000205039978, |
| "18": 0.9310000538825989, |
| "19": 0.9610000252723694 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.547, |
| "13": 0.668, |
| "14": 0.639, |
| "18": 0.692, |
| "19": 0.805 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.685, |
| "13": 0.713, |
| "14": 0.648, |
| "18": 0.748, |
| "19": 0.766 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.826, |
| "13": 0.761, |
| "14": 0.705, |
| "18": 0.726, |
| "19": 0.851 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.557, |
| "13": 0.67, |
| "14": 0.886, |
| "18": 0.681, |
| "19": 0.8 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.768, |
| "13": 0.802, |
| "14": 0.885, |
| "18": 0.735, |
| "19": 0.859 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.935, |
| "13": 0.879, |
| "14": 0.89, |
| "18": 0.911, |
| "19": 0.89 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9650000333786011, |
| "21": 0.9150000214576721, |
| "22": 0.906000018119812, |
| "25": 0.9590000510215759, |
| "26": 0.8910000324249268 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.9190000295639038, |
| "22": 0.9220000505447388, |
| "25": 0.9610000252723694, |
| "26": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.72, |
| "21": 0.777, |
| "22": 0.646, |
| "25": 0.71, |
| "26": 0.628 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.815, |
| "21": 0.758, |
| "22": 0.706, |
| "25": 0.764, |
| "26": 0.631 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.83, |
| "21": 0.824, |
| "22": 0.704, |
| "25": 0.819, |
| "26": 0.746 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.913, |
| "21": 0.782, |
| "22": 0.878, |
| "25": 0.706, |
| "26": 0.625 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.932, |
| "21": 0.795, |
| "22": 0.871, |
| "25": 0.834, |
| "26": 0.773 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.923, |
| "21": 0.844, |
| "22": 0.892, |
| "25": 0.917, |
| "26": 0.808 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9570000171661377, |
| "2": 0.9310000538825989, |
| "3": 0.9380000233650208, |
| "5": 0.937000036239624, |
| "6": 0.8960000276565552 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9530000686645508, |
| "2": 0.9460000395774841, |
| "3": 0.9310000538825989, |
| "5": 0.9320000410079956, |
| "6": 0.8990000486373901 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.681, |
| "2": 0.6, |
| "3": 0.63, |
| "5": 0.574, |
| "6": 0.678 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.732, |
| "2": 0.8, |
| "3": 0.662, |
| "5": 0.679, |
| "6": 0.686 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.799, |
| "2": 0.839, |
| "3": 0.651, |
| "5": 0.76, |
| "6": 0.72 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.845, |
| "2": 0.862, |
| "3": 0.712, |
| "5": 0.802, |
| "6": 0.737 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.858, |
| "2": 0.877, |
| "3": 0.692, |
| "5": 0.875, |
| "6": 0.751 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.918, |
| "2": 0.893, |
| "3": 0.77, |
| "5": 0.902, |
| "6": 0.735 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9690000414848328, |
| "5.0": 0.9700000286102295 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9730000495910645, |
| "5.0": 0.9720000624656677 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.693, |
| "5.0": 0.693 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.743, |
| "5.0": 0.743 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.788, |
| "5.0": 0.788 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.816, |
| "5.0": 0.816 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.842, |
| "5.0": 0.842 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.945, |
| "5.0": 0.945 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9570000171661377, |
| "Python": 0.9850000739097595, |
| "HTML": 0.9830000400543213, |
| "Java": 0.9700000286102295, |
| "PHP": 0.9660000205039978 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9550000429153442, |
| "Python": 0.9960000514984131, |
| "HTML": 0.9880000352859497, |
| "Java": 0.9660000205039978, |
| "PHP": 0.9600000381469727 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.564, |
| "Python": 0.61, |
| "HTML": 0.795, |
| "Java": 0.59, |
| "PHP": 0.601 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.622, |
| "Python": 0.675, |
| "HTML": 0.861, |
| "Java": 0.647, |
| "PHP": 0.652 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.841, |
| "Python": 0.714, |
| "HTML": 0.907, |
| "Java": 0.754, |
| "PHP": 0.687 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.63, |
| "Python": 0.63, |
| "HTML": 0.695, |
| "Java": 0.594, |
| "PHP": 0.919 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.642, |
| "Python": 0.94, |
| "HTML": 0.933, |
| "Java": 0.646, |
| "PHP": 0.932 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.882, |
| "Python": 0.961, |
| "HTML": 0.954, |
| "Java": 0.91, |
| "PHP": 0.943 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9320000410079956, |
| "1": 0.9860000610351562, |
| "2": 0.9320000410079956, |
| "3": 0.9480000734329224 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9280000329017639, |
| "1": 0.9880000352859497, |
| "2": 0.9220000505447388, |
| "3": 0.9430000185966492 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.692, |
| "1": 0.635, |
| "2": 0.677, |
| "3": 0.714 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.693, |
| "1": 0.795, |
| "2": 0.701, |
| "3": 0.784 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.843, |
| "1": 0.888, |
| "2": 0.803, |
| "3": 0.839 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.861, |
| "1": 0.961, |
| "2": 0.597, |
| "3": 0.737 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.866, |
| "1": 0.967, |
| "2": 0.75, |
| "3": 0.832 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.88, |
| "1": 0.966, |
| "2": 0.817, |
| "3": 0.862 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 0.999000072479248 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.9980000257492065, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.89, |
| "fr": 0.865, |
| "de": 0.661, |
| "es": 0.984, |
| "nl": 0.762 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.903, |
| "fr": 0.989, |
| "de": 0.997, |
| "es": 0.983, |
| "nl": 0.78 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.969, |
| "fr": 0.998, |
| "de": 0.999, |
| "es": 0.992, |
| "nl": 0.999 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.889, |
| "fr": 0.995, |
| "de": 0.999, |
| "es": 0.997, |
| "nl": 0.999 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.994, |
| "de": 0.998, |
| "es": 0.997, |
| "nl": 1.0 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.996, |
| "de": 0.998, |
| "es": 0.998, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |