| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745755226531, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9574000433087348, |
| "llm_top_1_test_accuracy": 0.7171, |
| "llm_top_2_test_accuracy": 0.75324375, |
| "llm_top_5_test_accuracy": 0.8162874999999998, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9553813003003596, |
| "sae_top_1_test_accuracy": 0.82249375, |
| "sae_top_2_test_accuracy": 0.8694000000000001, |
| "sae_top_5_test_accuracy": 0.9082749999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9658000469207764, |
| "llm_top_1_test_accuracy": 0.7106, |
| "llm_top_2_test_accuracy": 0.7302, |
| "llm_top_5_test_accuracy": 0.8082, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.96500004529953, |
| "sae_top_1_test_accuracy": 0.798, |
| "sae_top_2_test_accuracy": 0.9002000000000001, |
| "sae_top_5_test_accuracy": 0.9254, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9532000422477722, |
| "llm_top_1_test_accuracy": 0.675, |
| "llm_top_2_test_accuracy": 0.6782, |
| "llm_top_5_test_accuracy": 0.7470000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9516000509262085, |
| "sae_top_1_test_accuracy": 0.751, |
| "sae_top_2_test_accuracy": 0.8256, |
| "sae_top_5_test_accuracy": 0.9122, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9282000541687012, |
| "llm_top_1_test_accuracy": 0.696, |
| "llm_top_2_test_accuracy": 0.729, |
| "llm_top_5_test_accuracy": 0.7752000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9208000421524047, |
| "sae_top_1_test_accuracy": 0.7906000000000001, |
| "sae_top_2_test_accuracy": 0.8228, |
| "sae_top_5_test_accuracy": 0.8602000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9282000541687012, |
| "llm_top_1_test_accuracy": 0.7150000000000001, |
| "llm_top_2_test_accuracy": 0.7494, |
| "llm_top_5_test_accuracy": 0.829, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9332000374794006, |
| "sae_top_1_test_accuracy": 0.7884000000000001, |
| "sae_top_2_test_accuracy": 0.8152000000000001, |
| "sae_top_5_test_accuracy": 0.8513999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9660000503063202, |
| "llm_top_1_test_accuracy": 0.68, |
| "llm_top_2_test_accuracy": 0.743, |
| "llm_top_5_test_accuracy": 0.774, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9540000557899475, |
| "sae_top_1_test_accuracy": 0.888, |
| "sae_top_2_test_accuracy": 0.884, |
| "sae_top_5_test_accuracy": 0.911, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9690000534057617, |
| "llm_top_1_test_accuracy": 0.631, |
| "llm_top_2_test_accuracy": 0.6548, |
| "llm_top_5_test_accuracy": 0.778, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9696000576019287, |
| "sae_top_1_test_accuracy": 0.7588, |
| "sae_top_2_test_accuracy": 0.8602000000000001, |
| "sae_top_5_test_accuracy": 0.9148, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9490000307559967, |
| "llm_top_1_test_accuracy": 0.661, |
| "llm_top_2_test_accuracy": 0.74675, |
| "llm_top_5_test_accuracy": 0.8194999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9502500593662262, |
| "sae_top_1_test_accuracy": 0.8087500000000001, |
| "sae_top_2_test_accuracy": 0.848, |
| "sae_top_5_test_accuracy": 0.8919999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.9682000000000001, |
| "llm_top_2_test_accuracy": 0.9945999999999999, |
| "llm_top_5_test_accuracy": 0.9994, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9986000537872315, |
| "sae_top_1_test_accuracy": 0.9964000000000001, |
| "sae_top_2_test_accuracy": 0.9992000000000001, |
| "sae_top_5_test_accuracy": 0.9992000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.23.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.23.hook_resid_post", |
| "hook_layer": 23, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9510000348091125, |
| "1": 0.9690000414848328, |
| "2": 0.9410000443458557, |
| "6": 0.9860000610351562, |
| "9": 0.9780000448226929 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9470000267028809, |
| "1": 0.9620000720024109, |
| "2": 0.9490000605583191, |
| "6": 0.9930000305175781, |
| "9": 0.9780000448226929 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.599, |
| "1": 0.653, |
| "2": 0.688, |
| "6": 0.799, |
| "9": 0.814 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.601, |
| "1": 0.663, |
| "2": 0.824, |
| "6": 0.831, |
| "9": 0.732 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.719, |
| "1": 0.669, |
| "2": 0.846, |
| "6": 0.894, |
| "9": 0.913 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.596, |
| "1": 0.81, |
| "2": 0.846, |
| "6": 0.79, |
| "9": 0.948 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.882, |
| "1": 0.809, |
| "2": 0.881, |
| "6": 0.982, |
| "9": 0.947 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.875, |
| "1": 0.878, |
| "2": 0.918, |
| "6": 0.989, |
| "9": 0.967 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.956000030040741, |
| "13": 0.9490000605583191, |
| "14": 0.9530000686645508, |
| "18": 0.9390000700950623, |
| "19": 0.9610000252723694 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9570000171661377, |
| "13": 0.9490000605583191, |
| "14": 0.9630000591278076, |
| "18": 0.9420000314712524, |
| "19": 0.9550000429153442 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.592, |
| "13": 0.657, |
| "14": 0.632, |
| "18": 0.688, |
| "19": 0.806 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.582, |
| "13": 0.682, |
| "14": 0.623, |
| "18": 0.7, |
| "19": 0.804 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.823, |
| "13": 0.759, |
| "14": 0.644, |
| "18": 0.733, |
| "19": 0.776 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.7, |
| "13": 0.674, |
| "14": 0.881, |
| "18": 0.698, |
| "19": 0.802 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.891, |
| "13": 0.771, |
| "14": 0.876, |
| "18": 0.744, |
| "19": 0.846 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.955, |
| "13": 0.893, |
| "14": 0.891, |
| "18": 0.917, |
| "19": 0.905 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9550000429153442, |
| "21": 0.9160000681877136, |
| "22": 0.9100000262260437, |
| "25": 0.9460000395774841, |
| "26": 0.8770000338554382 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9530000686645508, |
| "21": 0.9120000600814819, |
| "22": 0.9160000681877136, |
| "25": 0.9570000171661377, |
| "26": 0.9030000567436218 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.727, |
| "21": 0.737, |
| "22": 0.642, |
| "25": 0.71, |
| "26": 0.664 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.817, |
| "21": 0.751, |
| "22": 0.694, |
| "25": 0.721, |
| "26": 0.662 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.836, |
| "21": 0.813, |
| "22": 0.745, |
| "25": 0.782, |
| "26": 0.7 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.843, |
| "21": 0.738, |
| "22": 0.851, |
| "25": 0.862, |
| "26": 0.659 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.863, |
| "21": 0.784, |
| "22": 0.858, |
| "25": 0.865, |
| "26": 0.744 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.913, |
| "21": 0.842, |
| "22": 0.863, |
| "25": 0.91, |
| "26": 0.773 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9650000333786011, |
| "2": 0.937000036239624, |
| "3": 0.9320000410079956, |
| "5": 0.940000057220459, |
| "6": 0.8920000195503235 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9590000510215759, |
| "2": 0.9360000491142273, |
| "3": 0.9250000715255737, |
| "5": 0.9280000329017639, |
| "6": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.82, |
| "2": 0.783, |
| "3": 0.695, |
| "5": 0.6, |
| "6": 0.677 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.82, |
| "2": 0.818, |
| "3": 0.707, |
| "5": 0.716, |
| "6": 0.686 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.87, |
| "2": 0.872, |
| "3": 0.792, |
| "5": 0.847, |
| "6": 0.764 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.783, |
| "2": 0.861, |
| "3": 0.671, |
| "5": 0.858, |
| "6": 0.769 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.872, |
| "2": 0.861, |
| "3": 0.686, |
| "5": 0.89, |
| "6": 0.767 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.94, |
| "2": 0.882, |
| "3": 0.786, |
| "5": 0.888, |
| "6": 0.761 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9530000686645508, |
| "5.0": 0.9550000429153442 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9650000333786011, |
| "5.0": 0.9670000672340393 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.68, |
| "5.0": 0.68 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.743, |
| "5.0": 0.743 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.774, |
| "5.0": 0.774 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.888, |
| "5.0": 0.888 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.884, |
| "5.0": 0.884 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.911, |
| "5.0": 0.911 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9530000686645508, |
| "Python": 0.9790000319480896, |
| "HTML": 0.987000048160553, |
| "Java": 0.9670000672340393, |
| "PHP": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9580000638961792, |
| "Python": 0.9860000610351562, |
| "HTML": 0.9830000400543213, |
| "Java": 0.9650000333786011, |
| "PHP": 0.9530000686645508 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.566, |
| "Python": 0.602, |
| "HTML": 0.793, |
| "Java": 0.618, |
| "PHP": 0.576 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.554, |
| "Python": 0.625, |
| "HTML": 0.867, |
| "Java": 0.596, |
| "PHP": 0.632 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.792, |
| "Python": 0.777, |
| "HTML": 0.887, |
| "Java": 0.752, |
| "PHP": 0.682 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.624, |
| "Python": 0.899, |
| "HTML": 0.702, |
| "Java": 0.642, |
| "PHP": 0.927 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.851, |
| "Python": 0.909, |
| "HTML": 0.953, |
| "Java": 0.656, |
| "PHP": 0.932 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.858, |
| "Python": 0.972, |
| "HTML": 0.951, |
| "Java": 0.863, |
| "PHP": 0.93 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9310000538825989, |
| "1": 0.9900000691413879, |
| "2": 0.9310000538825989, |
| "3": 0.9490000605583191 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9340000152587891, |
| "1": 0.9880000352859497, |
| "2": 0.9280000329017639, |
| "3": 0.9460000395774841 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.684, |
| "1": 0.657, |
| "2": 0.572, |
| "3": 0.731 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.79, |
| "1": 0.766, |
| "2": 0.686, |
| "3": 0.745 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.824, |
| "1": 0.836, |
| "2": 0.805, |
| "3": 0.813 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.851, |
| "1": 0.862, |
| "2": 0.802, |
| "3": 0.72 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.883, |
| "1": 0.877, |
| "2": 0.854, |
| "3": 0.778 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.893, |
| "1": 0.976, |
| "2": 0.85, |
| "3": 0.849 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 0.999000072479248, |
| "de": 0.999000072479248, |
| "es": 0.9980000257492065, |
| "nl": 0.9980000257492065 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.985, |
| "fr": 0.997, |
| "de": 0.992, |
| "es": 0.867, |
| "nl": 1.0 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.98, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.996, |
| "nl": 1.0 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.984, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 0.999, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |