| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754022657, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9575375493615865, |
| "llm_top_1_test_accuracy": 0.6629437499999999, |
| "llm_top_2_test_accuracy": 0.73073125, |
| "llm_top_5_test_accuracy": 0.78996875, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9556375440210103, |
| "sae_top_1_test_accuracy": 0.77980625, |
| "sae_top_2_test_accuracy": 0.8275187499999999, |
| "sae_top_5_test_accuracy": 0.8989, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9702000498771668, |
| "llm_top_1_test_accuracy": 0.7175999999999999, |
| "llm_top_2_test_accuracy": 0.72, |
| "llm_top_5_test_accuracy": 0.7806, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.964400053024292, |
| "sae_top_1_test_accuracy": 0.8340000000000002, |
| "sae_top_2_test_accuracy": 0.8937999999999999, |
| "sae_top_5_test_accuracy": 0.9102, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9552000522613525, |
| "llm_top_1_test_accuracy": 0.6586000000000001, |
| "llm_top_2_test_accuracy": 0.7243999999999999, |
| "llm_top_5_test_accuracy": 0.7744000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9530000448226928, |
| "sae_top_1_test_accuracy": 0.7666000000000001, |
| "sae_top_2_test_accuracy": 0.8149999999999998, |
| "sae_top_5_test_accuracy": 0.9, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9330000519752503, |
| "llm_top_1_test_accuracy": 0.6738000000000001, |
| "llm_top_2_test_accuracy": 0.7247999999999999, |
| "llm_top_5_test_accuracy": 0.7607999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9296000361442566, |
| "sae_top_1_test_accuracy": 0.7478, |
| "sae_top_2_test_accuracy": 0.7793999999999999, |
| "sae_top_5_test_accuracy": 0.8475999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9218000411987305, |
| "llm_top_1_test_accuracy": 0.6165999999999998, |
| "llm_top_2_test_accuracy": 0.6402, |
| "llm_top_5_test_accuracy": 0.7112, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9268000364303589, |
| "sae_top_1_test_accuracy": 0.7532, |
| "sae_top_2_test_accuracy": 0.798, |
| "sae_top_5_test_accuracy": 0.8465999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9630000591278076, |
| "llm_top_1_test_accuracy": 0.643, |
| "llm_top_2_test_accuracy": 0.698, |
| "llm_top_5_test_accuracy": 0.741, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9490000605583191, |
| "sae_top_1_test_accuracy": 0.777, |
| "sae_top_2_test_accuracy": 0.843, |
| "sae_top_5_test_accuracy": 0.938, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.970400047302246, |
| "llm_top_1_test_accuracy": 0.6422000000000001, |
| "llm_top_2_test_accuracy": 0.7318, |
| "llm_top_5_test_accuracy": 0.7853999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9712000489234924, |
| "sae_top_1_test_accuracy": 0.6386000000000001, |
| "sae_top_2_test_accuracy": 0.6888, |
| "sae_top_5_test_accuracy": 0.8767999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9475000351667404, |
| "llm_top_1_test_accuracy": 0.62575, |
| "llm_top_2_test_accuracy": 0.75925, |
| "llm_top_5_test_accuracy": 0.83675, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9515000432729721, |
| "sae_top_1_test_accuracy": 0.73525, |
| "sae_top_2_test_accuracy": 0.8157500000000001, |
| "sae_top_5_test_accuracy": 0.882, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9992000579833984, |
| "llm_top_1_test_accuracy": 0.726, |
| "llm_top_2_test_accuracy": 0.8474, |
| "llm_top_5_test_accuracy": 0.9296, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9996000289916992, |
| "sae_top_1_test_accuracy": 0.986, |
| "sae_top_2_test_accuracy": 0.9863999999999999, |
| "sae_top_5_test_accuracy": 0.99, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.8.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.8.hook_resid_post", |
| "hook_layer": 8, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9490000605583191, |
| "1": 0.9610000252723694, |
| "2": 0.9490000605583191, |
| "6": 0.9900000691413879, |
| "9": 0.9730000495910645 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9540000557899475, |
| "1": 0.9720000624656677, |
| "2": 0.9550000429153442, |
| "6": 0.9920000433921814, |
| "9": 0.9780000448226929 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.584, |
| "1": 0.663, |
| "2": 0.666, |
| "6": 0.823, |
| "9": 0.852 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.585, |
| "1": 0.641, |
| "2": 0.686, |
| "6": 0.835, |
| "9": 0.853 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.698, |
| "1": 0.713, |
| "2": 0.742, |
| "6": 0.889, |
| "9": 0.861 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.875, |
| "1": 0.672, |
| "2": 0.861, |
| "6": 0.817, |
| "9": 0.945 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.873, |
| "1": 0.803, |
| "2": 0.855, |
| "6": 0.982, |
| "9": 0.956 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.878, |
| "1": 0.821, |
| "2": 0.906, |
| "6": 0.988, |
| "9": 0.958 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9640000462532043, |
| "13": 0.9470000267028809, |
| "14": 0.9620000720024109, |
| "18": 0.9330000281333923, |
| "19": 0.9590000510215759 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9650000333786011, |
| "13": 0.9500000476837158, |
| "14": 0.9590000510215759, |
| "18": 0.940000057220459, |
| "19": 0.9620000720024109 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.575, |
| "13": 0.685, |
| "14": 0.641, |
| "18": 0.662, |
| "19": 0.73 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.748, |
| "13": 0.688, |
| "14": 0.664, |
| "18": 0.719, |
| "19": 0.803 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.786, |
| "13": 0.752, |
| "14": 0.758, |
| "18": 0.726, |
| "19": 0.85 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.786, |
| "13": 0.669, |
| "14": 0.866, |
| "18": 0.671, |
| "19": 0.841 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.877, |
| "13": 0.766, |
| "14": 0.867, |
| "18": 0.723, |
| "19": 0.842 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.948, |
| "13": 0.883, |
| "14": 0.872, |
| "18": 0.914, |
| "19": 0.883 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9490000605583191, |
| "21": 0.9280000329017639, |
| "22": 0.9180000424385071, |
| "25": 0.9610000252723694, |
| "26": 0.8920000195503235 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.9260000586509705, |
| "22": 0.9270000457763672, |
| "25": 0.9580000638961792, |
| "26": 0.9000000357627869 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.738, |
| "21": 0.717, |
| "22": 0.62, |
| "25": 0.693, |
| "26": 0.601 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.811, |
| "21": 0.749, |
| "22": 0.682, |
| "25": 0.715, |
| "26": 0.667 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.823, |
| "21": 0.772, |
| "22": 0.719, |
| "25": 0.82, |
| "26": 0.67 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.902, |
| "21": 0.601, |
| "22": 0.773, |
| "25": 0.857, |
| "26": 0.606 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.909, |
| "21": 0.656, |
| "22": 0.775, |
| "25": 0.853, |
| "26": 0.704 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.92, |
| "21": 0.849, |
| "22": 0.82, |
| "25": 0.863, |
| "26": 0.786 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9520000219345093, |
| "2": 0.9430000185966492, |
| "3": 0.9260000586509705, |
| "5": 0.9230000376701355, |
| "6": 0.89000004529953 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9500000476837158, |
| "2": 0.9380000233650208, |
| "3": 0.9320000410079956, |
| "5": 0.9190000295639038, |
| "6": 0.8700000643730164 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.718, |
| "2": 0.586, |
| "3": 0.61, |
| "5": 0.578, |
| "6": 0.591 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.742, |
| "2": 0.606, |
| "3": 0.605, |
| "5": 0.604, |
| "6": 0.644 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.755, |
| "2": 0.773, |
| "3": 0.63, |
| "5": 0.71, |
| "6": 0.688 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.872, |
| "2": 0.854, |
| "3": 0.554, |
| "5": 0.837, |
| "6": 0.649 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.9, |
| "2": 0.863, |
| "3": 0.653, |
| "5": 0.893, |
| "6": 0.681 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.926, |
| "2": 0.893, |
| "3": 0.762, |
| "5": 0.888, |
| "6": 0.764 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9490000605583191, |
| "5.0": 0.9490000605583191 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9620000720024109, |
| "5.0": 0.9640000462532043 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.643, |
| "5.0": 0.643 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.698, |
| "5.0": 0.698 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.741, |
| "5.0": 0.741 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.777, |
| "5.0": 0.777 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.843, |
| "5.0": 0.843 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.938, |
| "5.0": 0.938 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9610000252723694, |
| "Python": 0.9860000610351562, |
| "HTML": 0.987000048160553, |
| "Java": 0.9630000591278076, |
| "PHP": 0.9590000510215759 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9650000333786011, |
| "Python": 0.9820000529289246, |
| "HTML": 0.987000048160553, |
| "Java": 0.9630000591278076, |
| "PHP": 0.9550000429153442 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.558, |
| "Python": 0.625, |
| "HTML": 0.803, |
| "Java": 0.619, |
| "PHP": 0.606 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.675, |
| "Python": 0.699, |
| "HTML": 0.92, |
| "Java": 0.718, |
| "PHP": 0.647 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.777, |
| "Python": 0.751, |
| "HTML": 0.937, |
| "Java": 0.784, |
| "PHP": 0.678 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.638, |
| "Python": 0.626, |
| "HTML": 0.721, |
| "Java": 0.608, |
| "PHP": 0.6 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.628, |
| "Python": 0.634, |
| "HTML": 0.891, |
| "Java": 0.647, |
| "PHP": 0.644 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.803, |
| "Python": 0.968, |
| "HTML": 0.961, |
| "Java": 0.759, |
| "PHP": 0.893 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9360000491142273, |
| "1": 0.9860000610351562, |
| "2": 0.9340000152587891, |
| "3": 0.9500000476837158 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9420000314712524, |
| "1": 0.9910000562667847, |
| "2": 0.9150000214576721, |
| "3": 0.9420000314712524 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.637, |
| "1": 0.693, |
| "2": 0.567, |
| "3": 0.606 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.805, |
| "1": 0.805, |
| "2": 0.684, |
| "3": 0.743 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.825, |
| "1": 0.883, |
| "2": 0.777, |
| "3": 0.862 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.701, |
| "1": 0.675, |
| "2": 0.736, |
| "3": 0.829 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.75, |
| "1": 0.875, |
| "2": 0.78, |
| "3": 0.858 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.871, |
| "1": 0.937, |
| "2": 0.844, |
| "3": 0.876 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 0.999000072479248 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 0.999000072479248, |
| "nl": 0.999000072479248 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.726, |
| "fr": 0.581, |
| "de": 0.794, |
| "es": 0.901, |
| "nl": 0.628 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.849, |
| "fr": 0.888, |
| "de": 0.818, |
| "es": 0.959, |
| "nl": 0.723 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.932, |
| "fr": 0.959, |
| "de": 0.916, |
| "es": 0.977, |
| "nl": 0.864 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.988, |
| "de": 0.959, |
| "es": 0.989, |
| "nl": 0.996 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.989, |
| "de": 0.954, |
| "es": 0.992, |
| "nl": 0.999 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.996, |
| "fr": 0.997, |
| "de": 0.962, |
| "es": 0.997, |
| "nl": 0.998 |
| } |
| } |
| } |
| } |