| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745755144983, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9582312863320113, |
| "llm_top_1_test_accuracy": 0.71601875, |
| "llm_top_2_test_accuracy": 0.75726875, |
| "llm_top_5_test_accuracy": 0.81870625, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9569812905043363, |
| "sae_top_1_test_accuracy": 0.791975, |
| "sae_top_2_test_accuracy": 0.8449249999999999, |
| "sae_top_5_test_accuracy": 0.9019999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9660000443458557, |
| "llm_top_1_test_accuracy": 0.7192000000000001, |
| "llm_top_2_test_accuracy": 0.7424000000000001, |
| "llm_top_5_test_accuracy": 0.7922, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9658000469207764, |
| "sae_top_1_test_accuracy": 0.8321999999999999, |
| "sae_top_2_test_accuracy": 0.8987999999999999, |
| "sae_top_5_test_accuracy": 0.9262, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9564000487327575, |
| "llm_top_1_test_accuracy": 0.6732, |
| "llm_top_2_test_accuracy": 0.6802, |
| "llm_top_5_test_accuracy": 0.7538, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9488000512123108, |
| "sae_top_1_test_accuracy": 0.8008000000000001, |
| "sae_top_2_test_accuracy": 0.8564, |
| "sae_top_5_test_accuracy": 0.9008, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9314000368118286, |
| "llm_top_1_test_accuracy": 0.6942, |
| "llm_top_2_test_accuracy": 0.7312000000000001, |
| "llm_top_5_test_accuracy": 0.7821999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9254000425338745, |
| "sae_top_1_test_accuracy": 0.7976, |
| "sae_top_2_test_accuracy": 0.8295999999999999, |
| "sae_top_5_test_accuracy": 0.8754, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9312000274658203, |
| "llm_top_1_test_accuracy": 0.6779999999999999, |
| "llm_top_2_test_accuracy": 0.7544, |
| "llm_top_5_test_accuracy": 0.8202, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9334000587463379, |
| "sae_top_1_test_accuracy": 0.7672, |
| "sae_top_2_test_accuracy": 0.8002, |
| "sae_top_5_test_accuracy": 0.8608, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9645000398159027, |
| "llm_top_1_test_accuracy": 0.687, |
| "llm_top_2_test_accuracy": 0.75, |
| "llm_top_5_test_accuracy": 0.804, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9625000357627869, |
| "sae_top_1_test_accuracy": 0.597, |
| "sae_top_2_test_accuracy": 0.656, |
| "sae_top_5_test_accuracy": 0.865, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.968600046634674, |
| "llm_top_1_test_accuracy": 0.6344, |
| "llm_top_2_test_accuracy": 0.688, |
| "llm_top_5_test_accuracy": 0.772, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9706000328063965, |
| "sae_top_1_test_accuracy": 0.759, |
| "sae_top_2_test_accuracy": 0.8652, |
| "sae_top_5_test_accuracy": 0.9014, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9477500468492508, |
| "llm_top_1_test_accuracy": 0.67775, |
| "llm_top_2_test_accuracy": 0.7187500000000001, |
| "llm_top_5_test_accuracy": 0.8262499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9497500509023666, |
| "sae_top_1_test_accuracy": 0.7879999999999999, |
| "sae_top_2_test_accuracy": 0.859, |
| "sae_top_5_test_accuracy": 0.887, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 1.0, |
| "llm_top_1_test_accuracy": 0.9644, |
| "llm_top_2_test_accuracy": 0.9932000000000001, |
| "llm_top_5_test_accuracy": 0.999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9996000051498413, |
| "sae_top_1_test_accuracy": 0.994, |
| "sae_top_2_test_accuracy": 0.9942, |
| "sae_top_5_test_accuracy": 0.9994, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.22.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.22.hook_resid_post", |
| "hook_layer": 22, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9430000185966492, |
| "1": 0.9670000672340393, |
| "2": 0.9550000429153442, |
| "6": 0.987000048160553, |
| "9": 0.9770000576972961 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9500000476837158, |
| "1": 0.968000054359436, |
| "2": 0.9450000524520874, |
| "6": 0.9880000352859497, |
| "9": 0.9790000319480896 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.588, |
| "1": 0.662, |
| "2": 0.687, |
| "6": 0.809, |
| "9": 0.85 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.591, |
| "1": 0.655, |
| "2": 0.778, |
| "6": 0.846, |
| "9": 0.842 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.614, |
| "1": 0.692, |
| "2": 0.831, |
| "6": 0.901, |
| "9": 0.923 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.705, |
| "1": 0.655, |
| "2": 0.867, |
| "6": 0.984, |
| "9": 0.95 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.876, |
| "1": 0.825, |
| "2": 0.867, |
| "6": 0.98, |
| "9": 0.946 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.887, |
| "1": 0.882, |
| "2": 0.912, |
| "6": 0.993, |
| "9": 0.957 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9540000557899475, |
| "13": 0.9430000185966492, |
| "14": 0.9530000686645508, |
| "18": 0.9360000491142273, |
| "19": 0.9580000638961792 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9660000205039978, |
| "13": 0.9510000348091125, |
| "14": 0.9580000638961792, |
| "18": 0.9440000653266907, |
| "19": 0.9630000591278076 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.558, |
| "13": 0.676, |
| "14": 0.649, |
| "18": 0.686, |
| "19": 0.797 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.619, |
| "13": 0.68, |
| "14": 0.648, |
| "18": 0.672, |
| "19": 0.782 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.832, |
| "13": 0.754, |
| "14": 0.661, |
| "18": 0.733, |
| "19": 0.789 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.878, |
| "13": 0.683, |
| "14": 0.898, |
| "18": 0.688, |
| "19": 0.857 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.885, |
| "13": 0.777, |
| "14": 0.904, |
| "18": 0.847, |
| "19": 0.869 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.924, |
| "13": 0.825, |
| "14": 0.925, |
| "18": 0.919, |
| "19": 0.911 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9550000429153442, |
| "21": 0.9130000472068787, |
| "22": 0.9150000214576721, |
| "25": 0.9530000686645508, |
| "26": 0.8910000324249268 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9640000462532043, |
| "21": 0.9070000648498535, |
| "22": 0.9290000200271606, |
| "25": 0.9610000252723694, |
| "26": 0.8960000276565552 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.72, |
| "21": 0.739, |
| "22": 0.654, |
| "25": 0.711, |
| "26": 0.647 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.804, |
| "21": 0.767, |
| "22": 0.697, |
| "25": 0.754, |
| "26": 0.634 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.828, |
| "21": 0.811, |
| "22": 0.755, |
| "25": 0.813, |
| "26": 0.704 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.866, |
| "21": 0.793, |
| "22": 0.824, |
| "25": 0.859, |
| "26": 0.646 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.891, |
| "21": 0.831, |
| "22": 0.829, |
| "25": 0.871, |
| "26": 0.726 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.908, |
| "21": 0.853, |
| "22": 0.852, |
| "25": 0.907, |
| "26": 0.857 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9530000686645508, |
| "2": 0.9390000700950623, |
| "3": 0.9480000734329224, |
| "5": 0.9340000152587891, |
| "6": 0.893000066280365 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9610000252723694, |
| "2": 0.9330000281333923, |
| "3": 0.9320000410079956, |
| "5": 0.9340000152587891, |
| "6": 0.8960000276565552 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.781, |
| "2": 0.784, |
| "3": 0.576, |
| "5": 0.578, |
| "6": 0.671 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.83, |
| "2": 0.825, |
| "3": 0.66, |
| "5": 0.772, |
| "6": 0.685 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.864, |
| "2": 0.874, |
| "3": 0.764, |
| "5": 0.836, |
| "6": 0.763 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.858, |
| "2": 0.873, |
| "3": 0.625, |
| "5": 0.802, |
| "6": 0.678 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.873, |
| "2": 0.889, |
| "3": 0.63, |
| "5": 0.886, |
| "6": 0.723 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.93, |
| "2": 0.903, |
| "3": 0.824, |
| "5": 0.896, |
| "6": 0.751 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9640000462532043, |
| "5.0": 0.9610000252723694 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9660000205039978, |
| "5.0": 0.9630000591278076 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.687, |
| "5.0": 0.687 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.75, |
| "5.0": 0.75 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.804, |
| "5.0": 0.804 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.597, |
| "5.0": 0.597 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.656, |
| "5.0": 0.656 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.865, |
| "5.0": 0.865 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9500000476837158, |
| "Python": 0.984000027179718, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9660000205039978, |
| "PHP": 0.9640000462532043 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.956000030040741, |
| "Python": 0.9860000610351562, |
| "HTML": 0.984000027179718, |
| "Java": 0.9580000638961792, |
| "PHP": 0.9590000510215759 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.558, |
| "Python": 0.599, |
| "HTML": 0.802, |
| "Java": 0.61, |
| "PHP": 0.603 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.675, |
| "Python": 0.619, |
| "HTML": 0.881, |
| "Java": 0.608, |
| "PHP": 0.657 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.812, |
| "Python": 0.726, |
| "HTML": 0.881, |
| "Java": 0.75, |
| "PHP": 0.691 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.622, |
| "Python": 0.936, |
| "HTML": 0.703, |
| "Java": 0.603, |
| "PHP": 0.931 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.878, |
| "Python": 0.945, |
| "HTML": 0.913, |
| "Java": 0.656, |
| "PHP": 0.934 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.888, |
| "Python": 0.965, |
| "HTML": 0.942, |
| "Java": 0.774, |
| "PHP": 0.938 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9350000619888306, |
| "1": 0.9860000610351562, |
| "2": 0.9280000329017639, |
| "3": 0.9500000476837158 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9350000619888306, |
| "1": 0.9890000224113464, |
| "2": 0.9270000457763672, |
| "3": 0.940000057220459 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.602, |
| "1": 0.665, |
| "2": 0.691, |
| "3": 0.753 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.68, |
| "1": 0.673, |
| "2": 0.753, |
| "3": 0.769 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.823, |
| "1": 0.852, |
| "2": 0.811, |
| "3": 0.819 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.855, |
| "1": 0.83, |
| "2": 0.716, |
| "3": 0.751 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.89, |
| "1": 0.92, |
| "2": 0.835, |
| "3": 0.791 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.872, |
| "1": 0.929, |
| "2": 0.878, |
| "3": 0.869 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9980000257492065, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.963, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.862, |
| "nl": 1.0 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.971, |
| "fr": 0.998, |
| "de": 0.999, |
| "es": 0.998, |
| "nl": 1.0 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.998, |
| "de": 0.997, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.972, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.973, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.998, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 0.999 |
| } |
| } |
| } |
| } |