saebench / sparse_probing /gemma-2-2b-res-snap-matryoshka-dc_blocks.15.hook_resid_post_eval_results.json
chanind's picture
Upload sparse_probing/gemma-2-2b-res-snap-matryoshka-dc_blocks.15.hook_resid_post_eval_results.json with huggingface_hub
0bbe1aa verified
{
"eval_type_id": "sparse_probing",
"eval_config": {
"random_seed": 42,
"dataset_names": [
"LabHC/bias_in_bios_class_set1",
"LabHC/bias_in_bios_class_set2",
"LabHC/bias_in_bios_class_set3",
"canrager/amazon_reviews_mcauley_1and5",
"canrager/amazon_reviews_mcauley_1and5_sentiment",
"codeparrot/github-code",
"fancyzhx/ag_news",
"Helsinki-NLP/europarl"
],
"probe_train_set_size": 4000,
"probe_test_set_size": 1000,
"context_length": 128,
"sae_batch_size": 125,
"llm_batch_size": 32,
"llm_dtype": "bfloat16",
"model_name": "gemma-2-2b",
"k_values": [
1,
2,
5
],
"lower_vram_usage": false
},
"eval_id": "acf88c24-09bd-4561-9b58-1bd96edb30c6",
"datetime_epoch_millis": 1745627243425,
"eval_result_metrics": {
"llm": {
"llm_test_accuracy": 0.956756292283535,
"llm_top_1_test_accuracy": 0.66578125,
"llm_top_2_test_accuracy": 0.739075,
"llm_top_5_test_accuracy": 0.7944062500000001,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null
},
"sae": {
"sae_test_accuracy": 0.9545562915503979,
"sae_top_1_test_accuracy": 0.7364812500000001,
"sae_top_2_test_accuracy": 0.80128125,
"sae_top_5_test_accuracy": 0.8687750000000002,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
}
},
"eval_result_details": [
{
"dataset_name": "LabHC/bias_in_bios_class_set1_results",
"llm_test_accuracy": 0.9646000385284423,
"llm_top_1_test_accuracy": 0.6682,
"llm_top_2_test_accuracy": 0.7158,
"llm_top_5_test_accuracy": 0.7906000000000001,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9640000343322754,
"sae_top_1_test_accuracy": 0.7022,
"sae_top_2_test_accuracy": 0.7998000000000001,
"sae_top_5_test_accuracy": 0.885,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "LabHC/bias_in_bios_class_set2_results",
"llm_test_accuracy": 0.9532000660896301,
"llm_top_1_test_accuracy": 0.6594,
"llm_top_2_test_accuracy": 0.7108,
"llm_top_5_test_accuracy": 0.7686,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9434000372886657,
"sae_top_1_test_accuracy": 0.7260000000000001,
"sae_top_2_test_accuracy": 0.7466000000000002,
"sae_top_5_test_accuracy": 0.8334000000000001,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "LabHC/bias_in_bios_class_set3_results",
"llm_test_accuracy": 0.9242000460624695,
"llm_top_1_test_accuracy": 0.6918000000000001,
"llm_top_2_test_accuracy": 0.752,
"llm_top_5_test_accuracy": 0.7882,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9266000509262085,
"sae_top_1_test_accuracy": 0.6592,
"sae_top_2_test_accuracy": 0.7504,
"sae_top_5_test_accuracy": 0.7984000000000001,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
"llm_test_accuracy": 0.9196000576019288,
"llm_top_1_test_accuracy": 0.5908,
"llm_top_2_test_accuracy": 0.6436,
"llm_top_5_test_accuracy": 0.6726000000000001,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9088000535964966,
"sae_top_1_test_accuracy": 0.6891999999999999,
"sae_top_2_test_accuracy": 0.7234,
"sae_top_5_test_accuracy": 0.7516,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
"llm_test_accuracy": 0.9750000238418579,
"llm_top_1_test_accuracy": 0.692,
"llm_top_2_test_accuracy": 0.744,
"llm_top_5_test_accuracy": 0.798,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9725000262260437,
"sae_top_1_test_accuracy": 0.777,
"sae_top_2_test_accuracy": 0.813,
"sae_top_5_test_accuracy": 0.884,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "codeparrot/github-code_results",
"llm_test_accuracy": 0.9732000589370727,
"llm_top_1_test_accuracy": 0.6355999999999999,
"llm_top_2_test_accuracy": 0.7094000000000001,
"llm_top_5_test_accuracy": 0.7702,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9732000470161438,
"sae_top_1_test_accuracy": 0.6908000000000001,
"sae_top_2_test_accuracy": 0.8030000000000002,
"sae_top_5_test_accuracy": 0.9282,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "fancyzhx/ag_news_results",
"llm_test_accuracy": 0.9442500472068787,
"llm_top_1_test_accuracy": 0.65025,
"llm_top_2_test_accuracy": 0.749,
"llm_top_5_test_accuracy": 0.8342499999999999,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9497500360012054,
"sae_top_1_test_accuracy": 0.68925,
"sae_top_2_test_accuracy": 0.79725,
"sae_top_5_test_accuracy": 0.879,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "Helsinki-NLP/europarl_results",
"llm_test_accuracy": 1.0,
"llm_top_1_test_accuracy": 0.7382000000000001,
"llm_top_2_test_accuracy": 0.8879999999999999,
"llm_top_5_test_accuracy": 0.9328,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9982000470161438,
"sae_top_1_test_accuracy": 0.9581999999999999,
"sae_top_2_test_accuracy": 0.9768000000000001,
"sae_top_5_test_accuracy": 0.9905999999999999,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
}
],
"sae_bench_commit_hash": "Unknown",
"sae_lens_id": "blocks.15.hook_resid_post",
"sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc",
"sae_lens_version": "5.9.1",
"sae_cfg_dict": {
"architecture": "jumprelu",
"d_in": 2304,
"d_sae": 32768,
"activation_fn_str": "relu",
"apply_b_dec_to_input": true,
"finetuning_scaling_factor": false,
"context_size": 1024,
"model_name": "gemma-2-2b",
"hook_name": "blocks.15.hook_resid_post",
"hook_layer": 15,
"hook_head_index": null,
"prepend_bos": true,
"dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B",
"dataset_trust_remote_code": true,
"normalize_activations": "none",
"dtype": "torch.bfloat16",
"device": "cuda",
"sae_lens_training_version": "5.5.1",
"activation_fn_kwargs": {
"k": 40
},
"neuronpedia_id": null,
"model_from_pretrained_kwargs": {
"center_writing_weights": false
},
"seqpos_slice": [
null
]
},
"eval_result_unstructured": {
"LabHC/bias_in_bios_class_set1_results": {
"sae_test_accuracy": {
"0": 0.9520000219345093,
"1": 0.9660000205039978,
"2": 0.9500000476837158,
"6": 0.9820000529289246,
"9": 0.9700000286102295
},
"llm_test_accuracy": {
"0": 0.9520000219345093,
"1": 0.9640000462532043,
"2": 0.9510000348091125,
"6": 0.987000048160553,
"9": 0.9690000414848328
},
"llm_top_1_test_accuracy": {
"0": 0.592,
"1": 0.617,
"2": 0.661,
"6": 0.761,
"9": 0.71
},
"llm_top_2_test_accuracy": {
"0": 0.582,
"1": 0.666,
"2": 0.755,
"6": 0.859,
"9": 0.717
},
"llm_top_5_test_accuracy": {
"0": 0.674,
"1": 0.723,
"2": 0.781,
"6": 0.898,
"9": 0.877
},
"sae_top_1_test_accuracy": {
"0": 0.547,
"1": 0.633,
"2": 0.872,
"6": 0.759,
"9": 0.7
},
"sae_top_2_test_accuracy": {
"0": 0.636,
"1": 0.738,
"2": 0.878,
"6": 0.808,
"9": 0.939
},
"sae_top_5_test_accuracy": {
"0": 0.701,
"1": 0.92,
"2": 0.888,
"6": 0.971,
"9": 0.945
}
},
"LabHC/bias_in_bios_class_set2_results": {
"sae_test_accuracy": {
"11": 0.9500000476837158,
"13": 0.9550000429153442,
"14": 0.9450000524520874,
"18": 0.9100000262260437,
"19": 0.9570000171661377
},
"llm_test_accuracy": {
"11": 0.9580000638961792,
"13": 0.9490000605583191,
"14": 0.9620000720024109,
"18": 0.9350000619888306,
"19": 0.9620000720024109
},
"llm_top_1_test_accuracy": {
"11": 0.531,
"13": 0.659,
"14": 0.649,
"18": 0.699,
"19": 0.759
},
"llm_top_2_test_accuracy": {
"11": 0.693,
"13": 0.687,
"14": 0.665,
"18": 0.73,
"19": 0.779
},
"llm_top_5_test_accuracy": {
"11": 0.82,
"13": 0.748,
"14": 0.702,
"18": 0.733,
"19": 0.84
},
"sae_top_1_test_accuracy": {
"11": 0.857,
"13": 0.672,
"14": 0.663,
"18": 0.68,
"19": 0.758
},
"sae_top_2_test_accuracy": {
"11": 0.867,
"13": 0.684,
"14": 0.689,
"18": 0.697,
"19": 0.796
},
"sae_top_5_test_accuracy": {
"11": 0.886,
"13": 0.806,
"14": 0.865,
"18": 0.756,
"19": 0.854
}
},
"LabHC/bias_in_bios_class_set3_results": {
"sae_test_accuracy": {
"20": 0.9610000252723694,
"21": 0.9190000295639038,
"22": 0.9170000553131104,
"25": 0.9480000734329224,
"26": 0.8880000710487366
},
"llm_test_accuracy": {
"20": 0.9540000557899475,
"21": 0.9120000600814819,
"22": 0.9140000343322754,
"25": 0.9600000381469727,
"26": 0.8810000419616699
},
"llm_top_1_test_accuracy": {
"20": 0.711,
"21": 0.776,
"22": 0.669,
"25": 0.684,
"26": 0.619
},
"llm_top_2_test_accuracy": {
"20": 0.815,
"21": 0.774,
"22": 0.69,
"25": 0.774,
"26": 0.707
},
"llm_top_5_test_accuracy": {
"20": 0.854,
"21": 0.841,
"22": 0.672,
"25": 0.848,
"26": 0.726
},
"sae_top_1_test_accuracy": {
"20": 0.658,
"21": 0.725,
"22": 0.595,
"25": 0.696,
"26": 0.622
},
"sae_top_2_test_accuracy": {
"20": 0.815,
"21": 0.764,
"22": 0.603,
"25": 0.871,
"26": 0.699
},
"sae_top_5_test_accuracy": {
"20": 0.874,
"21": 0.79,
"22": 0.668,
"25": 0.869,
"26": 0.791
}
},
"canrager/amazon_reviews_mcauley_1and5_results": {
"sae_test_accuracy": {
"1": 0.9420000314712524,
"2": 0.9260000586509705,
"3": 0.9070000648498535,
"5": 0.9110000729560852,
"6": 0.8580000400543213
},
"llm_test_accuracy": {
"1": 0.9530000686645508,
"2": 0.9300000667572021,
"3": 0.921000063419342,
"5": 0.9290000200271606,
"6": 0.8650000691413879
},
"llm_top_1_test_accuracy": {
"1": 0.641,
"2": 0.603,
"3": 0.566,
"5": 0.563,
"6": 0.581
},
"llm_top_2_test_accuracy": {
"1": 0.643,
"2": 0.637,
"3": 0.585,
"5": 0.646,
"6": 0.707
},
"llm_top_5_test_accuracy": {
"1": 0.745,
"2": 0.649,
"3": 0.597,
"5": 0.636,
"6": 0.736
},
"sae_top_1_test_accuracy": {
"1": 0.814,
"2": 0.687,
"3": 0.65,
"5": 0.588,
"6": 0.707
},
"sae_top_2_test_accuracy": {
"1": 0.857,
"2": 0.77,
"3": 0.65,
"5": 0.629,
"6": 0.711
},
"sae_top_5_test_accuracy": {
"1": 0.856,
"2": 0.767,
"3": 0.761,
"5": 0.63,
"6": 0.744
}
},
"canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
"sae_test_accuracy": {
"1.0": 0.9700000286102295,
"5.0": 0.9750000238418579
},
"llm_test_accuracy": {
"1.0": 0.9750000238418579,
"5.0": 0.9750000238418579
},
"llm_top_1_test_accuracy": {
"1.0": 0.692,
"5.0": 0.692
},
"llm_top_2_test_accuracy": {
"1.0": 0.744,
"5.0": 0.744
},
"llm_top_5_test_accuracy": {
"1.0": 0.798,
"5.0": 0.798
},
"sae_top_1_test_accuracy": {
"1.0": 0.777,
"5.0": 0.777
},
"sae_top_2_test_accuracy": {
"1.0": 0.813,
"5.0": 0.813
},
"sae_top_5_test_accuracy": {
"1.0": 0.884,
"5.0": 0.884
}
},
"codeparrot/github-code_results": {
"sae_test_accuracy": {
"C": 0.9640000462532043,
"Python": 0.9850000739097595,
"HTML": 0.9890000224113464,
"Java": 0.9650000333786011,
"PHP": 0.9630000591278076
},
"llm_test_accuracy": {
"C": 0.9620000720024109,
"Python": 0.9910000562667847,
"HTML": 0.9820000529289246,
"Java": 0.9670000672340393,
"PHP": 0.9640000462532043
},
"llm_top_1_test_accuracy": {
"C": 0.583,
"Python": 0.637,
"HTML": 0.789,
"Java": 0.591,
"PHP": 0.578
},
"llm_top_2_test_accuracy": {
"C": 0.662,
"Python": 0.672,
"HTML": 0.809,
"Java": 0.704,
"PHP": 0.7
},
"llm_top_5_test_accuracy": {
"C": 0.782,
"Python": 0.705,
"HTML": 0.915,
"Java": 0.743,
"PHP": 0.706
},
"sae_top_1_test_accuracy": {
"C": 0.621,
"Python": 0.609,
"HTML": 0.705,
"Java": 0.597,
"PHP": 0.922
},
"sae_top_2_test_accuracy": {
"C": 0.625,
"Python": 0.932,
"HTML": 0.899,
"Java": 0.644,
"PHP": 0.915
},
"sae_top_5_test_accuracy": {
"C": 0.899,
"Python": 0.947,
"HTML": 0.951,
"Java": 0.915,
"PHP": 0.929
}
},
"fancyzhx/ag_news_results": {
"sae_test_accuracy": {
"0": 0.9360000491142273,
"1": 0.9830000400543213,
"2": 0.9430000185966492,
"3": 0.937000036239624
},
"llm_test_accuracy": {
"0": 0.9310000538825989,
"1": 0.9860000610351562,
"2": 0.9270000457763672,
"3": 0.9330000281333923
},
"llm_top_1_test_accuracy": {
"0": 0.692,
"1": 0.659,
"2": 0.585,
"3": 0.665
},
"llm_top_2_test_accuracy": {
"0": 0.737,
"1": 0.799,
"2": 0.69,
"3": 0.77
},
"llm_top_5_test_accuracy": {
"0": 0.846,
"1": 0.888,
"2": 0.775,
"3": 0.828
},
"sae_top_1_test_accuracy": {
"0": 0.847,
"1": 0.664,
"2": 0.589,
"3": 0.657
},
"sae_top_2_test_accuracy": {
"0": 0.851,
"1": 0.887,
"2": 0.696,
"3": 0.755
},
"sae_top_5_test_accuracy": {
"0": 0.861,
"1": 0.964,
"2": 0.819,
"3": 0.872
}
},
"Helsinki-NLP/europarl_results": {
"sae_test_accuracy": {
"en": 0.9980000257492065,
"fr": 0.999000072479248,
"de": 0.999000072479248,
"es": 0.9980000257492065,
"nl": 0.9970000386238098
},
"llm_test_accuracy": {
"en": 1.0,
"fr": 1.0,
"de": 1.0,
"es": 1.0,
"nl": 1.0
},
"llm_top_1_test_accuracy": {
"en": 0.852,
"fr": 0.632,
"de": 0.564,
"es": 0.971,
"nl": 0.672
},
"llm_top_2_test_accuracy": {
"en": 0.889,
"fr": 0.861,
"de": 0.956,
"es": 0.989,
"nl": 0.745
},
"llm_top_5_test_accuracy": {
"en": 0.957,
"fr": 0.986,
"de": 0.974,
"es": 0.995,
"nl": 0.752
},
"sae_top_1_test_accuracy": {
"en": 0.858,
"fr": 0.993,
"de": 0.946,
"es": 0.996,
"nl": 0.998
},
"sae_top_2_test_accuracy": {
"en": 0.947,
"fr": 0.995,
"de": 0.944,
"es": 0.998,
"nl": 1.0
},
"sae_top_5_test_accuracy": {
"en": 0.995,
"fr": 0.998,
"de": 0.964,
"es": 0.997,
"nl": 0.999
}
}
}
}