| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754355071, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9595187954604626, |
| "llm_top_1_test_accuracy": 0.6592937499999999, |
| "llm_top_2_test_accuracy": 0.7205437500000001, |
| "llm_top_5_test_accuracy": 0.77929375, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9542938001453876, |
| "sae_top_1_test_accuracy": 0.70965, |
| "sae_top_2_test_accuracy": 0.7845, |
| "sae_top_5_test_accuracy": 0.8708249999999998, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.970400047302246, |
| "llm_top_1_test_accuracy": 0.648, |
| "llm_top_2_test_accuracy": 0.6872, |
| "llm_top_5_test_accuracy": 0.7782, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9606000542640686, |
| "sae_top_1_test_accuracy": 0.7636000000000001, |
| "sae_top_2_test_accuracy": 0.8318, |
| "sae_top_5_test_accuracy": 0.8785999999999998, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9522000551223755, |
| "llm_top_1_test_accuracy": 0.6716, |
| "llm_top_2_test_accuracy": 0.7188, |
| "llm_top_5_test_accuracy": 0.7686, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9474000453948974, |
| "sae_top_1_test_accuracy": 0.6728, |
| "sae_top_2_test_accuracy": 0.7702, |
| "sae_top_5_test_accuracy": 0.8393999999999998, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9324000477790833, |
| "llm_top_1_test_accuracy": 0.6824, |
| "llm_top_2_test_accuracy": 0.7440000000000001, |
| "llm_top_5_test_accuracy": 0.7575999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9250000596046448, |
| "sae_top_1_test_accuracy": 0.7343999999999999, |
| "sae_top_2_test_accuracy": 0.7826000000000001, |
| "sae_top_5_test_accuracy": 0.8581999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.915600037574768, |
| "llm_top_1_test_accuracy": 0.603, |
| "llm_top_2_test_accuracy": 0.6405999999999998, |
| "llm_top_5_test_accuracy": 0.6686, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9124000430107116, |
| "sae_top_1_test_accuracy": 0.613, |
| "sae_top_2_test_accuracy": 0.6612, |
| "sae_top_5_test_accuracy": 0.8116, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9830000400543213, |
| "llm_top_1_test_accuracy": 0.671, |
| "llm_top_2_test_accuracy": 0.724, |
| "llm_top_5_test_accuracy": 0.766, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9730000495910645, |
| "sae_top_1_test_accuracy": 0.753, |
| "sae_top_2_test_accuracy": 0.816, |
| "sae_top_5_test_accuracy": 0.9, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9702000498771668, |
| "llm_top_1_test_accuracy": 0.6594, |
| "llm_top_2_test_accuracy": 0.6930000000000001, |
| "llm_top_5_test_accuracy": 0.7622, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9688000559806824, |
| "sae_top_1_test_accuracy": 0.6086, |
| "sae_top_2_test_accuracy": 0.6724, |
| "sae_top_5_test_accuracy": 0.7954000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9527500569820404, |
| "llm_top_1_test_accuracy": 0.69375, |
| "llm_top_2_test_accuracy": 0.7797499999999999, |
| "llm_top_5_test_accuracy": 0.82775, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9487500488758087, |
| "sae_top_1_test_accuracy": 0.7999999999999999, |
| "sae_top_2_test_accuracy": 0.8170000000000001, |
| "sae_top_5_test_accuracy": 0.8859999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9996000289916992, |
| "llm_top_1_test_accuracy": 0.6451999999999999, |
| "llm_top_2_test_accuracy": 0.7769999999999999, |
| "llm_top_5_test_accuracy": 0.9054, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9984000444412231, |
| "sae_top_1_test_accuracy": 0.7317999999999999, |
| "sae_top_2_test_accuracy": 0.9248, |
| "sae_top_5_test_accuracy": 0.9974000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.12.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.12.hook_resid_post", |
| "hook_layer": 12, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": "gemma-2-2b/12-res-matryoshka-dc", |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9440000653266907, |
| "1": 0.9600000381469727, |
| "2": 0.9480000734329224, |
| "6": 0.9800000190734863, |
| "9": 0.971000075340271 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9510000348091125, |
| "1": 0.971000075340271, |
| "2": 0.9580000638961792, |
| "6": 0.9930000305175781, |
| "9": 0.9790000319480896 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.574, |
| "1": 0.646, |
| "2": 0.685, |
| "6": 0.784, |
| "9": 0.551 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.577, |
| "1": 0.628, |
| "2": 0.713, |
| "6": 0.806, |
| "9": 0.712 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.708, |
| "1": 0.673, |
| "2": 0.755, |
| "6": 0.895, |
| "9": 0.86 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.562, |
| "1": 0.642, |
| "2": 0.882, |
| "6": 0.791, |
| "9": 0.941 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.666, |
| "1": 0.689, |
| "2": 0.889, |
| "6": 0.98, |
| "9": 0.935 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.755, |
| "1": 0.815, |
| "2": 0.897, |
| "6": 0.978, |
| "9": 0.948 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9650000333786011, |
| "13": 0.9500000476837158, |
| "14": 0.9480000734329224, |
| "18": 0.9150000214576721, |
| "19": 0.9590000510215759 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9670000672340393, |
| "13": 0.9490000605583191, |
| "14": 0.9540000557899475, |
| "18": 0.9280000329017639, |
| "19": 0.9630000591278076 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.553, |
| "13": 0.666, |
| "14": 0.655, |
| "18": 0.692, |
| "19": 0.792 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.678, |
| "13": 0.713, |
| "14": 0.678, |
| "18": 0.739, |
| "19": 0.786 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.782, |
| "13": 0.733, |
| "14": 0.753, |
| "18": 0.732, |
| "19": 0.843 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.558, |
| "13": 0.682, |
| "14": 0.643, |
| "18": 0.701, |
| "19": 0.78 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.75, |
| "13": 0.673, |
| "14": 0.844, |
| "18": 0.731, |
| "19": 0.853 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.847, |
| "13": 0.845, |
| "14": 0.879, |
| "18": 0.775, |
| "19": 0.851 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.921000063419342, |
| "22": 0.9040000438690186, |
| "25": 0.9620000720024109, |
| "26": 0.8840000629425049 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.9300000667572021, |
| "22": 0.9140000343322754, |
| "25": 0.9720000624656677, |
| "26": 0.8920000195503235 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.703, |
| "21": 0.764, |
| "22": 0.639, |
| "25": 0.691, |
| "26": 0.615 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.817, |
| "21": 0.767, |
| "22": 0.683, |
| "25": 0.761, |
| "26": 0.692 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.868, |
| "21": 0.783, |
| "22": 0.673, |
| "25": 0.8, |
| "26": 0.664 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.899, |
| "21": 0.697, |
| "22": 0.722, |
| "25": 0.715, |
| "26": 0.639 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.901, |
| "21": 0.732, |
| "22": 0.705, |
| "25": 0.875, |
| "26": 0.7 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.927, |
| "21": 0.835, |
| "22": 0.855, |
| "25": 0.909, |
| "26": 0.765 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9380000233650208, |
| "2": 0.9330000281333923, |
| "3": 0.9170000553131104, |
| "5": 0.9110000729560852, |
| "6": 0.8630000352859497 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9500000476837158, |
| "2": 0.9280000329017639, |
| "3": 0.9100000262260437, |
| "5": 0.9170000553131104, |
| "6": 0.8730000257492065 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.677, |
| "2": 0.585, |
| "3": 0.585, |
| "5": 0.569, |
| "6": 0.599 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.756, |
| "2": 0.636, |
| "3": 0.613, |
| "5": 0.563, |
| "6": 0.635 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.763, |
| "2": 0.625, |
| "3": 0.635, |
| "5": 0.647, |
| "6": 0.673 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.652, |
| "2": 0.62, |
| "3": 0.55, |
| "5": 0.641, |
| "6": 0.602 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.717, |
| "2": 0.655, |
| "3": 0.658, |
| "5": 0.64, |
| "6": 0.636 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.887, |
| "2": 0.858, |
| "3": 0.704, |
| "5": 0.845, |
| "6": 0.764 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9730000495910645, |
| "5.0": 0.9730000495910645 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.984000027179718, |
| "5.0": 0.9820000529289246 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.671, |
| "5.0": 0.671 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.724, |
| "5.0": 0.724 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.766, |
| "5.0": 0.766 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.753, |
| "5.0": 0.753 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.816, |
| "5.0": 0.816 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.9, |
| "5.0": 0.9 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9500000476837158, |
| "Python": 0.9910000562667847, |
| "HTML": 0.9850000739097595, |
| "Java": 0.9630000591278076, |
| "PHP": 0.9550000429153442 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9550000429153442, |
| "Python": 0.9850000739097595, |
| "HTML": 0.9930000305175781, |
| "Java": 0.9640000462532043, |
| "PHP": 0.9540000557899475 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.668, |
| "Python": 0.634, |
| "HTML": 0.78, |
| "Java": 0.633, |
| "PHP": 0.582 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.664, |
| "Python": 0.67, |
| "HTML": 0.793, |
| "Java": 0.69, |
| "PHP": 0.648 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.765, |
| "Python": 0.718, |
| "HTML": 0.898, |
| "Java": 0.735, |
| "PHP": 0.695 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.613, |
| "Python": 0.643, |
| "HTML": 0.557, |
| "Java": 0.625, |
| "PHP": 0.605 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.702, |
| "Python": 0.642, |
| "HTML": 0.792, |
| "Java": 0.618, |
| "PHP": 0.608 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.714, |
| "Python": 0.654, |
| "HTML": 0.919, |
| "Java": 0.764, |
| "PHP": 0.926 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9390000700950623, |
| "1": 0.9830000400543213, |
| "2": 0.9290000200271606, |
| "3": 0.9440000653266907 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9440000653266907, |
| "1": 0.9910000562667847, |
| "2": 0.9250000715255737, |
| "3": 0.9510000348091125 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.807, |
| "1": 0.664, |
| "2": 0.666, |
| "3": 0.638 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.811, |
| "1": 0.804, |
| "2": 0.688, |
| "3": 0.816 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.824, |
| "1": 0.881, |
| "2": 0.758, |
| "3": 0.848 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.829, |
| "1": 0.926, |
| "2": 0.795, |
| "3": 0.65 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.755, |
| "1": 0.925, |
| "2": 0.818, |
| "3": 0.77 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.853, |
| "1": 0.956, |
| "2": 0.846, |
| "3": 0.889 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 0.9980000257492065, |
| "es": 0.9960000514984131, |
| "nl": 0.999000072479248 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.999000072479248, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.733, |
| "fr": 0.592, |
| "de": 0.749, |
| "es": 0.494, |
| "nl": 0.658 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.834, |
| "fr": 0.586, |
| "de": 0.811, |
| "es": 0.911, |
| "nl": 0.743 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.886, |
| "fr": 0.922, |
| "de": 0.87, |
| "es": 0.981, |
| "nl": 0.868 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.751, |
| "fr": 0.585, |
| "de": 0.671, |
| "es": 0.994, |
| "nl": 0.658 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.99, |
| "de": 0.874, |
| "es": 0.993, |
| "nl": 0.769 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 0.997, |
| "es": 0.998, |
| "nl": 0.995 |
| } |
| } |
| } |
| } |