saebench / sparse_probing /gemma-2-2b-res-snap-matryoshka-dc_blocks.0.hook_resid_post_eval_results.json
chanind's picture
Upload sparse_probing/gemma-2-2b-res-snap-matryoshka-dc_blocks.0.hook_resid_post_eval_results.json with huggingface_hub
9433583 verified
{
"eval_type_id": "sparse_probing",
"eval_config": {
"random_seed": 42,
"dataset_names": [
"LabHC/bias_in_bios_class_set1",
"LabHC/bias_in_bios_class_set2",
"LabHC/bias_in_bios_class_set3",
"canrager/amazon_reviews_mcauley_1and5",
"canrager/amazon_reviews_mcauley_1and5_sentiment",
"codeparrot/github-code",
"fancyzhx/ag_news",
"Helsinki-NLP/europarl"
],
"probe_train_set_size": 4000,
"probe_test_set_size": 1000,
"context_length": 128,
"sae_batch_size": 125,
"llm_batch_size": 32,
"llm_dtype": "bfloat16",
"model_name": "gemma-2-2b",
"k_values": [
1,
2,
5
],
"lower_vram_usage": false
},
"eval_id": "acf88c24-09bd-4561-9b58-1bd96edb30c6",
"datetime_epoch_millis": 1745622098059,
"eval_result_metrics": {
"llm": {
"llm_test_accuracy": 0.9399250376969576,
"llm_top_1_test_accuracy": 0.68490625,
"llm_top_2_test_accuracy": 0.72038125,
"llm_top_5_test_accuracy": 0.7786125,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null
},
"sae": {
"sae_test_accuracy": 0.9355187952518462,
"sae_top_1_test_accuracy": 0.71199375,
"sae_top_2_test_accuracy": 0.76948125,
"sae_top_5_test_accuracy": 0.83690625,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
}
},
"eval_result_details": [
{
"dataset_name": "LabHC/bias_in_bios_class_set1_results",
"llm_test_accuracy": 0.9592000365257263,
"llm_top_1_test_accuracy": 0.683,
"llm_top_2_test_accuracy": 0.724,
"llm_top_5_test_accuracy": 0.7544000000000001,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9540000557899475,
"sae_top_1_test_accuracy": 0.7824,
"sae_top_2_test_accuracy": 0.8586,
"sae_top_5_test_accuracy": 0.8804000000000001,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "LabHC/bias_in_bios_class_set2_results",
"llm_test_accuracy": 0.9484000444412232,
"llm_top_1_test_accuracy": 0.6866,
"llm_top_2_test_accuracy": 0.7142,
"llm_top_5_test_accuracy": 0.755,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9398000478744507,
"sae_top_1_test_accuracy": 0.6315999999999999,
"sae_top_2_test_accuracy": 0.7030000000000001,
"sae_top_5_test_accuracy": 0.8118000000000001,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "LabHC/bias_in_bios_class_set3_results",
"llm_test_accuracy": 0.9186000347137451,
"llm_top_1_test_accuracy": 0.6357999999999999,
"llm_top_2_test_accuracy": 0.6576000000000001,
"llm_top_5_test_accuracy": 0.7432000000000001,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9072000503540039,
"sae_top_1_test_accuracy": 0.6272,
"sae_top_2_test_accuracy": 0.7380000000000001,
"sae_top_5_test_accuracy": 0.825,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
"llm_test_accuracy": 0.8960000276565552,
"llm_top_1_test_accuracy": 0.6119999999999999,
"llm_top_2_test_accuracy": 0.6338,
"llm_top_5_test_accuracy": 0.6592,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.8964000463485717,
"sae_top_1_test_accuracy": 0.6232,
"sae_top_2_test_accuracy": 0.6592,
"sae_top_5_test_accuracy": 0.76,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
"llm_test_accuracy": 0.8935000598430634,
"llm_top_1_test_accuracy": 0.623,
"llm_top_2_test_accuracy": 0.638,
"llm_top_5_test_accuracy": 0.73,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.8770000338554382,
"sae_top_1_test_accuracy": 0.65,
"sae_top_2_test_accuracy": 0.665,
"sae_top_5_test_accuracy": 0.725,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "codeparrot/github-code_results",
"llm_test_accuracy": 0.9654000520706176,
"llm_top_1_test_accuracy": 0.6916,
"llm_top_2_test_accuracy": 0.753,
"llm_top_5_test_accuracy": 0.8103999999999999,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9684000492095948,
"sae_top_1_test_accuracy": 0.7318,
"sae_top_2_test_accuracy": 0.7958000000000001,
"sae_top_5_test_accuracy": 0.8648,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "fancyzhx/ag_news_results",
"llm_test_accuracy": 0.9385000318288803,
"llm_top_1_test_accuracy": 0.73525,
"llm_top_2_test_accuracy": 0.76625,
"llm_top_5_test_accuracy": 0.8285,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9417500495910645,
"sae_top_1_test_accuracy": 0.7817500000000001,
"sae_top_2_test_accuracy": 0.8232499999999999,
"sae_top_5_test_accuracy": 0.87025,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
},
{
"dataset_name": "Helsinki-NLP/europarl_results",
"llm_test_accuracy": 0.9998000144958497,
"llm_top_1_test_accuracy": 0.8119999999999999,
"llm_top_2_test_accuracy": 0.8762000000000001,
"llm_top_5_test_accuracy": 0.9481999999999999,
"llm_top_10_test_accuracy": null,
"llm_top_20_test_accuracy": null,
"llm_top_50_test_accuracy": null,
"llm_top_100_test_accuracy": null,
"sae_test_accuracy": 0.9996000289916992,
"sae_top_1_test_accuracy": 0.868,
"sae_top_2_test_accuracy": 0.9129999999999999,
"sae_top_5_test_accuracy": 0.958,
"sae_top_10_test_accuracy": null,
"sae_top_20_test_accuracy": null,
"sae_top_50_test_accuracy": null,
"sae_top_100_test_accuracy": null
}
],
"sae_bench_commit_hash": "Unknown",
"sae_lens_id": "blocks.0.hook_resid_post",
"sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc",
"sae_lens_version": "5.9.1",
"sae_cfg_dict": {
"architecture": "jumprelu",
"d_in": 2304,
"d_sae": 32768,
"activation_fn_str": "relu",
"apply_b_dec_to_input": true,
"finetuning_scaling_factor": false,
"context_size": 1024,
"model_name": "gemma-2-2b",
"hook_name": "blocks.0.hook_resid_post",
"hook_layer": 0,
"hook_head_index": null,
"prepend_bos": true,
"dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B",
"dataset_trust_remote_code": true,
"normalize_activations": "none",
"dtype": "torch.bfloat16",
"device": "cuda",
"sae_lens_training_version": "5.5.1",
"activation_fn_kwargs": {
"k": 40
},
"neuronpedia_id": null,
"model_from_pretrained_kwargs": {
"center_writing_weights": false
},
"seqpos_slice": [
null
]
},
"eval_result_unstructured": {
"LabHC/bias_in_bios_class_set1_results": {
"sae_test_accuracy": {
"0": 0.9330000281333923,
"1": 0.9480000734329224,
"2": 0.9390000700950623,
"6": 0.9860000610351562,
"9": 0.9640000462532043
},
"llm_test_accuracy": {
"0": 0.9260000586509705,
"1": 0.9550000429153442,
"2": 0.9510000348091125,
"6": 0.9890000224113464,
"9": 0.9750000238418579
},
"llm_top_1_test_accuracy": {
"0": 0.604,
"1": 0.665,
"2": 0.554,
"6": 0.797,
"9": 0.795
},
"llm_top_2_test_accuracy": {
"0": 0.687,
"1": 0.669,
"2": 0.636,
"6": 0.81,
"9": 0.818
},
"llm_top_5_test_accuracy": {
"0": 0.727,
"1": 0.643,
"2": 0.749,
"6": 0.812,
"9": 0.841
},
"sae_top_1_test_accuracy": {
"0": 0.701,
"1": 0.578,
"2": 0.81,
"6": 0.907,
"9": 0.916
},
"sae_top_2_test_accuracy": {
"0": 0.774,
"1": 0.799,
"2": 0.835,
"6": 0.961,
"9": 0.924
},
"sae_top_5_test_accuracy": {
"0": 0.818,
"1": 0.8,
"2": 0.893,
"6": 0.964,
"9": 0.927
}
},
"LabHC/bias_in_bios_class_set2_results": {
"sae_test_accuracy": {
"11": 0.9470000267028809,
"13": 0.9350000619888306,
"14": 0.9460000395774841,
"18": 0.9120000600814819,
"19": 0.9590000510215759
},
"llm_test_accuracy": {
"11": 0.9520000219345093,
"13": 0.9510000348091125,
"14": 0.9510000348091125,
"18": 0.9250000715255737,
"19": 0.9630000591278076
},
"llm_top_1_test_accuracy": {
"11": 0.619,
"13": 0.658,
"14": 0.71,
"18": 0.642,
"19": 0.804
},
"llm_top_2_test_accuracy": {
"11": 0.684,
"13": 0.66,
"14": 0.738,
"18": 0.69,
"19": 0.799
},
"llm_top_5_test_accuracy": {
"11": 0.8,
"13": 0.732,
"14": 0.727,
"18": 0.71,
"19": 0.806
},
"sae_top_1_test_accuracy": {
"11": 0.701,
"13": 0.659,
"14": 0.589,
"18": 0.628,
"19": 0.581
},
"sae_top_2_test_accuracy": {
"11": 0.7,
"13": 0.73,
"14": 0.62,
"18": 0.664,
"19": 0.801
},
"sae_top_5_test_accuracy": {
"11": 0.786,
"13": 0.716,
"14": 0.865,
"18": 0.862,
"19": 0.83
}
},
"LabHC/bias_in_bios_class_set3_results": {
"sae_test_accuracy": {
"20": 0.9270000457763672,
"21": 0.8950000405311584,
"22": 0.8960000276565552,
"25": 0.9480000734329224,
"26": 0.8700000643730164
},
"llm_test_accuracy": {
"20": 0.9460000395774841,
"21": 0.8960000276565552,
"22": 0.9100000262260437,
"25": 0.9520000219345093,
"26": 0.8890000581741333
},
"llm_top_1_test_accuracy": {
"20": 0.656,
"21": 0.632,
"22": 0.62,
"25": 0.658,
"26": 0.613
},
"llm_top_2_test_accuracy": {
"20": 0.639,
"21": 0.634,
"22": 0.619,
"25": 0.756,
"26": 0.64
},
"llm_top_5_test_accuracy": {
"20": 0.811,
"21": 0.755,
"22": 0.693,
"25": 0.782,
"26": 0.675
},
"sae_top_1_test_accuracy": {
"20": 0.653,
"21": 0.62,
"22": 0.646,
"25": 0.618,
"26": 0.599
},
"sae_top_2_test_accuracy": {
"20": 0.717,
"21": 0.648,
"22": 0.835,
"25": 0.825,
"26": 0.665
},
"sae_top_5_test_accuracy": {
"20": 0.895,
"21": 0.786,
"22": 0.833,
"25": 0.908,
"26": 0.703
}
},
"canrager/amazon_reviews_mcauley_1and5_results": {
"sae_test_accuracy": {
"1": 0.9200000166893005,
"2": 0.9120000600814819,
"3": 0.8990000486373901,
"5": 0.9020000696182251,
"6": 0.8490000367164612
},
"llm_test_accuracy": {
"1": 0.9240000247955322,
"2": 0.9150000214576721,
"3": 0.8860000371932983,
"5": 0.9010000228881836,
"6": 0.8540000319480896
},
"llm_top_1_test_accuracy": {
"1": 0.693,
"2": 0.614,
"3": 0.592,
"5": 0.55,
"6": 0.611
},
"llm_top_2_test_accuracy": {
"1": 0.749,
"2": 0.64,
"3": 0.599,
"5": 0.527,
"6": 0.654
},
"llm_top_5_test_accuracy": {
"1": 0.736,
"2": 0.651,
"3": 0.669,
"5": 0.587,
"6": 0.653
},
"sae_top_1_test_accuracy": {
"1": 0.574,
"2": 0.805,
"3": 0.572,
"5": 0.566,
"6": 0.599
},
"sae_top_2_test_accuracy": {
"1": 0.588,
"2": 0.831,
"3": 0.593,
"5": 0.666,
"6": 0.618
},
"sae_top_5_test_accuracy": {
"1": 0.797,
"2": 0.847,
"3": 0.642,
"5": 0.808,
"6": 0.706
}
},
"canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
"sae_test_accuracy": {
"1.0": 0.8760000467300415,
"5.0": 0.878000020980835
},
"llm_test_accuracy": {
"1.0": 0.8940000534057617,
"5.0": 0.893000066280365
},
"llm_top_1_test_accuracy": {
"1.0": 0.623,
"5.0": 0.623
},
"llm_top_2_test_accuracy": {
"1.0": 0.638,
"5.0": 0.638
},
"llm_top_5_test_accuracy": {
"1.0": 0.73,
"5.0": 0.73
},
"sae_top_1_test_accuracy": {
"1.0": 0.65,
"5.0": 0.65
},
"sae_top_2_test_accuracy": {
"1.0": 0.665,
"5.0": 0.665
},
"sae_top_5_test_accuracy": {
"1.0": 0.725,
"5.0": 0.725
}
},
"codeparrot/github-code_results": {
"sae_test_accuracy": {
"C": 0.9510000348091125,
"Python": 0.9850000739097595,
"HTML": 0.9890000224113464,
"Java": 0.9590000510215759,
"PHP": 0.9580000638961792
},
"llm_test_accuracy": {
"C": 0.9490000605583191,
"Python": 0.9810000658035278,
"HTML": 0.9900000691413879,
"Java": 0.956000030040741,
"PHP": 0.9510000348091125
},
"llm_top_1_test_accuracy": {
"C": 0.634,
"Python": 0.624,
"HTML": 0.911,
"Java": 0.663,
"PHP": 0.626
},
"llm_top_2_test_accuracy": {
"C": 0.773,
"Python": 0.68,
"HTML": 0.929,
"Java": 0.747,
"PHP": 0.636
},
"llm_top_5_test_accuracy": {
"C": 0.832,
"Python": 0.819,
"HTML": 0.952,
"Java": 0.77,
"PHP": 0.679
},
"sae_top_1_test_accuracy": {
"C": 0.705,
"Python": 0.715,
"HTML": 0.904,
"Java": 0.697,
"PHP": 0.638
},
"sae_top_2_test_accuracy": {
"C": 0.722,
"Python": 0.735,
"HTML": 0.942,
"Java": 0.763,
"PHP": 0.817
},
"sae_top_5_test_accuracy": {
"C": 0.783,
"Python": 0.953,
"HTML": 0.942,
"Java": 0.797,
"PHP": 0.849
}
},
"fancyzhx/ag_news_results": {
"sae_test_accuracy": {
"0": 0.9390000700950623,
"1": 0.9740000367164612,
"2": 0.9190000295639038,
"3": 0.9350000619888306
},
"llm_test_accuracy": {
"0": 0.9360000491142273,
"1": 0.9740000367164612,
"2": 0.9100000262260437,
"3": 0.9340000152587891
},
"llm_top_1_test_accuracy": {
"0": 0.787,
"1": 0.771,
"2": 0.675,
"3": 0.708
},
"llm_top_2_test_accuracy": {
"0": 0.801,
"1": 0.844,
"2": 0.702,
"3": 0.718
},
"llm_top_5_test_accuracy": {
"0": 0.831,
"1": 0.907,
"2": 0.774,
"3": 0.802
},
"sae_top_1_test_accuracy": {
"0": 0.821,
"1": 0.936,
"2": 0.752,
"3": 0.618
},
"sae_top_2_test_accuracy": {
"0": 0.83,
"1": 0.942,
"2": 0.767,
"3": 0.754
},
"sae_top_5_test_accuracy": {
"0": 0.836,
"1": 0.967,
"2": 0.821,
"3": 0.857
}
},
"Helsinki-NLP/europarl_results": {
"sae_test_accuracy": {
"en": 0.999000072479248,
"fr": 1.0,
"de": 1.0,
"es": 1.0,
"nl": 0.999000072479248
},
"llm_test_accuracy": {
"en": 1.0,
"fr": 1.0,
"de": 1.0,
"es": 0.999000072479248,
"nl": 1.0
},
"llm_top_1_test_accuracy": {
"en": 0.983,
"fr": 0.665,
"de": 0.889,
"es": 0.743,
"nl": 0.78
},
"llm_top_2_test_accuracy": {
"en": 0.991,
"fr": 0.763,
"de": 0.888,
"es": 0.951,
"nl": 0.788
},
"llm_top_5_test_accuracy": {
"en": 0.997,
"fr": 0.944,
"de": 0.939,
"es": 0.979,
"nl": 0.882
},
"sae_top_1_test_accuracy": {
"en": 0.999,
"fr": 0.816,
"de": 0.933,
"es": 0.69,
"nl": 0.902
},
"sae_top_2_test_accuracy": {
"en": 1.0,
"fr": 0.844,
"de": 1.0,
"es": 0.826,
"nl": 0.895
},
"sae_top_5_test_accuracy": {
"en": 0.999,
"fr": 0.888,
"de": 1.0,
"es": 0.995,
"nl": 0.908
}
}
}
}