| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754595623, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.956756292283535, |
| "llm_top_1_test_accuracy": 0.66578125, |
| "llm_top_2_test_accuracy": 0.739075, |
| "llm_top_5_test_accuracy": 0.7944062500000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9552437942475079, |
| "sae_top_1_test_accuracy": 0.743025, |
| "sae_top_2_test_accuracy": 0.8271375, |
| "sae_top_5_test_accuracy": 0.8893625, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9646000385284423, |
| "llm_top_1_test_accuracy": 0.6682, |
| "llm_top_2_test_accuracy": 0.7158, |
| "llm_top_5_test_accuracy": 0.7906000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9594000458717347, |
| "sae_top_1_test_accuracy": 0.7414, |
| "sae_top_2_test_accuracy": 0.7778, |
| "sae_top_5_test_accuracy": 0.8738000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9532000660896301, |
| "llm_top_1_test_accuracy": 0.6594, |
| "llm_top_2_test_accuracy": 0.7108, |
| "llm_top_5_test_accuracy": 0.7686, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9526000380516052, |
| "sae_top_1_test_accuracy": 0.7202, |
| "sae_top_2_test_accuracy": 0.7701999999999999, |
| "sae_top_5_test_accuracy": 0.8412, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9242000460624695, |
| "llm_top_1_test_accuracy": 0.6918000000000001, |
| "llm_top_2_test_accuracy": 0.752, |
| "llm_top_5_test_accuracy": 0.7882, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9302000522613525, |
| "sae_top_1_test_accuracy": 0.7016, |
| "sae_top_2_test_accuracy": 0.8009999999999999, |
| "sae_top_5_test_accuracy": 0.8618, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9196000576019288, |
| "llm_top_1_test_accuracy": 0.5908, |
| "llm_top_2_test_accuracy": 0.6436, |
| "llm_top_5_test_accuracy": 0.6726000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.910800039768219, |
| "sae_top_1_test_accuracy": 0.6401999999999999, |
| "sae_top_2_test_accuracy": 0.7140000000000001, |
| "sae_top_5_test_accuracy": 0.7864, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9750000238418579, |
| "llm_top_1_test_accuracy": 0.692, |
| "llm_top_2_test_accuracy": 0.744, |
| "llm_top_5_test_accuracy": 0.798, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9715000689029694, |
| "sae_top_1_test_accuracy": 0.68, |
| "sae_top_2_test_accuracy": 0.925, |
| "sae_top_5_test_accuracy": 0.956, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9732000589370727, |
| "llm_top_1_test_accuracy": 0.6355999999999999, |
| "llm_top_2_test_accuracy": 0.7094000000000001, |
| "llm_top_5_test_accuracy": 0.7702, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9720000505447388, |
| "sae_top_1_test_accuracy": 0.6914, |
| "sae_top_2_test_accuracy": 0.8183999999999999, |
| "sae_top_5_test_accuracy": 0.923, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9442500472068787, |
| "llm_top_1_test_accuracy": 0.65025, |
| "llm_top_2_test_accuracy": 0.749, |
| "llm_top_5_test_accuracy": 0.8342499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9462500363588333, |
| "sae_top_1_test_accuracy": 0.8009999999999999, |
| "sae_top_2_test_accuracy": 0.8174999999999999, |
| "sae_top_5_test_accuracy": 0.8775, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 1.0, |
| "llm_top_1_test_accuracy": 0.7382000000000001, |
| "llm_top_2_test_accuracy": 0.8879999999999999, |
| "llm_top_5_test_accuracy": 0.9328, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9992000222206116, |
| "sae_top_1_test_accuracy": 0.9683999999999999, |
| "sae_top_2_test_accuracy": 0.9931999999999999, |
| "sae_top_5_test_accuracy": 0.9952, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.15.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.15.hook_resid_post", |
| "hook_layer": 15, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9520000219345093, |
| "1": 0.9550000429153442, |
| "2": 0.9410000443458557, |
| "6": 0.9850000739097595, |
| "9": 0.9640000462532043 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9520000219345093, |
| "1": 0.9640000462532043, |
| "2": 0.9510000348091125, |
| "6": 0.987000048160553, |
| "9": 0.9690000414848328 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.592, |
| "1": 0.617, |
| "2": 0.661, |
| "6": 0.761, |
| "9": 0.71 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.582, |
| "1": 0.666, |
| "2": 0.755, |
| "6": 0.859, |
| "9": 0.717 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.674, |
| "1": 0.723, |
| "2": 0.781, |
| "6": 0.898, |
| "9": 0.877 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.641, |
| "1": 0.631, |
| "2": 0.875, |
| "6": 0.768, |
| "9": 0.792 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.65, |
| "1": 0.622, |
| "2": 0.877, |
| "6": 0.812, |
| "9": 0.928 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.739, |
| "1": 0.849, |
| "2": 0.889, |
| "6": 0.972, |
| "9": 0.92 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9700000286102295, |
| "13": 0.9600000381469727, |
| "14": 0.9490000605583191, |
| "18": 0.9200000166893005, |
| "19": 0.9640000462532043 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9580000638961792, |
| "13": 0.9490000605583191, |
| "14": 0.9620000720024109, |
| "18": 0.9350000619888306, |
| "19": 0.9620000720024109 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.531, |
| "13": 0.659, |
| "14": 0.649, |
| "18": 0.699, |
| "19": 0.759 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.693, |
| "13": 0.687, |
| "14": 0.665, |
| "18": 0.73, |
| "19": 0.779 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.82, |
| "13": 0.748, |
| "14": 0.702, |
| "18": 0.733, |
| "19": 0.84 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.85, |
| "13": 0.668, |
| "14": 0.654, |
| "18": 0.687, |
| "19": 0.742 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.855, |
| "13": 0.696, |
| "14": 0.809, |
| "18": 0.692, |
| "19": 0.799 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.86, |
| "13": 0.843, |
| "14": 0.871, |
| "18": 0.762, |
| "19": 0.87 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9620000720024109, |
| "21": 0.9220000505447388, |
| "22": 0.9120000600814819, |
| "25": 0.9640000462532043, |
| "26": 0.8910000324249268 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.9120000600814819, |
| "22": 0.9140000343322754, |
| "25": 0.9600000381469727, |
| "26": 0.8810000419616699 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.711, |
| "21": 0.776, |
| "22": 0.669, |
| "25": 0.684, |
| "26": 0.619 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.815, |
| "21": 0.774, |
| "22": 0.69, |
| "25": 0.774, |
| "26": 0.707 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.854, |
| "21": 0.841, |
| "22": 0.672, |
| "25": 0.848, |
| "26": 0.726 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.834, |
| "21": 0.695, |
| "22": 0.676, |
| "25": 0.686, |
| "26": 0.617 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.899, |
| "21": 0.827, |
| "22": 0.72, |
| "25": 0.881, |
| "26": 0.678 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.907, |
| "21": 0.849, |
| "22": 0.89, |
| "25": 0.871, |
| "26": 0.792 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9430000185966492, |
| "2": 0.9270000457763672, |
| "3": 0.9110000729560852, |
| "5": 0.9100000262260437, |
| "6": 0.8630000352859497 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9530000686645508, |
| "2": 0.9300000667572021, |
| "3": 0.921000063419342, |
| "5": 0.9290000200271606, |
| "6": 0.8650000691413879 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.641, |
| "2": 0.603, |
| "3": 0.566, |
| "5": 0.563, |
| "6": 0.581 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.643, |
| "2": 0.637, |
| "3": 0.585, |
| "5": 0.646, |
| "6": 0.707 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.745, |
| "2": 0.649, |
| "3": 0.597, |
| "5": 0.636, |
| "6": 0.736 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.629, |
| "2": 0.61, |
| "3": 0.551, |
| "5": 0.808, |
| "6": 0.603 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.791, |
| "2": 0.664, |
| "3": 0.69, |
| "5": 0.797, |
| "6": 0.628 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.868, |
| "2": 0.826, |
| "3": 0.754, |
| "5": 0.799, |
| "6": 0.685 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9720000624656677, |
| "5.0": 0.971000075340271 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9750000238418579, |
| "5.0": 0.9750000238418579 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.692, |
| "5.0": 0.692 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.744, |
| "5.0": 0.744 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.798, |
| "5.0": 0.798 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.68, |
| "5.0": 0.68 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.925, |
| "5.0": 0.925 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.956, |
| "5.0": 0.956 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9540000557899475, |
| "Python": 0.9820000529289246, |
| "HTML": 0.9910000562667847, |
| "Java": 0.9700000286102295, |
| "PHP": 0.9630000591278076 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9620000720024109, |
| "Python": 0.9910000562667847, |
| "HTML": 0.9820000529289246, |
| "Java": 0.9670000672340393, |
| "PHP": 0.9640000462532043 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.583, |
| "Python": 0.637, |
| "HTML": 0.789, |
| "Java": 0.591, |
| "PHP": 0.578 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.662, |
| "Python": 0.672, |
| "HTML": 0.809, |
| "Java": 0.704, |
| "PHP": 0.7 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.782, |
| "Python": 0.705, |
| "HTML": 0.915, |
| "Java": 0.743, |
| "PHP": 0.706 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.632, |
| "Python": 0.608, |
| "HTML": 0.702, |
| "Java": 0.594, |
| "PHP": 0.921 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.64, |
| "Python": 0.933, |
| "HTML": 0.945, |
| "Java": 0.66, |
| "PHP": 0.914 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.876, |
| "Python": 0.948, |
| "HTML": 0.952, |
| "Java": 0.904, |
| "PHP": 0.935 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9360000491142273, |
| "1": 0.9800000190734863, |
| "2": 0.9180000424385071, |
| "3": 0.9510000348091125 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9310000538825989, |
| "1": 0.9860000610351562, |
| "2": 0.9270000457763672, |
| "3": 0.9330000281333923 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.692, |
| "1": 0.659, |
| "2": 0.585, |
| "3": 0.665 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.737, |
| "1": 0.799, |
| "2": 0.69, |
| "3": 0.77 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.846, |
| "1": 0.888, |
| "2": 0.775, |
| "3": 0.828 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.844, |
| "1": 0.947, |
| "2": 0.767, |
| "3": 0.646 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.847, |
| "1": 0.944, |
| "2": 0.775, |
| "3": 0.704 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.875, |
| "1": 0.948, |
| "2": 0.839, |
| "3": 0.848 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.999000072479248, |
| "de": 1.0, |
| "es": 0.9970000386238098, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.852, |
| "fr": 0.632, |
| "de": 0.564, |
| "es": 0.971, |
| "nl": 0.672 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.889, |
| "fr": 0.861, |
| "de": 0.956, |
| "es": 0.989, |
| "nl": 0.745 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.957, |
| "fr": 0.986, |
| "de": 0.974, |
| "es": 0.995, |
| "nl": 0.752 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.86, |
| "fr": 0.996, |
| "de": 0.989, |
| "es": 0.997, |
| "nl": 1.0 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.992, |
| "fr": 0.996, |
| "de": 0.982, |
| "es": 0.997, |
| "nl": 0.999 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.994, |
| "fr": 0.998, |
| "de": 0.987, |
| "es": 0.997, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |