| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745755077961, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9593437947332858, |
| "llm_top_1_test_accuracy": 0.70410625, |
| "llm_top_2_test_accuracy": 0.7557, |
| "llm_top_5_test_accuracy": 0.8173062500000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9570937868207693, |
| "sae_top_1_test_accuracy": 0.7982750000000001, |
| "sae_top_2_test_accuracy": 0.8662875000000001, |
| "sae_top_5_test_accuracy": 0.90701875, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9682000517845154, |
| "llm_top_1_test_accuracy": 0.6666000000000001, |
| "llm_top_2_test_accuracy": 0.7152000000000001, |
| "llm_top_5_test_accuracy": 0.7978, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9696000456809998, |
| "sae_top_1_test_accuracy": 0.8346, |
| "sae_top_2_test_accuracy": 0.8986000000000001, |
| "sae_top_5_test_accuracy": 0.9308, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9588000416755676, |
| "llm_top_1_test_accuracy": 0.6688000000000001, |
| "llm_top_2_test_accuracy": 0.685, |
| "llm_top_5_test_accuracy": 0.7556, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9466000437736511, |
| "sae_top_1_test_accuracy": 0.7636, |
| "sae_top_2_test_accuracy": 0.8622, |
| "sae_top_5_test_accuracy": 0.9010000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9284000515937805, |
| "llm_top_1_test_accuracy": 0.6984, |
| "llm_top_2_test_accuracy": 0.7352000000000001, |
| "llm_top_5_test_accuracy": 0.7849999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.929200041294098, |
| "sae_top_1_test_accuracy": 0.8008, |
| "sae_top_2_test_accuracy": 0.8362, |
| "sae_top_5_test_accuracy": 0.8764, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9332000494003296, |
| "llm_top_1_test_accuracy": 0.6744, |
| "llm_top_2_test_accuracy": 0.7394000000000001, |
| "llm_top_5_test_accuracy": 0.8013999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.931000030040741, |
| "sae_top_1_test_accuracy": 0.7612, |
| "sae_top_2_test_accuracy": 0.8148, |
| "sae_top_5_test_accuracy": 0.849, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9670000374317169, |
| "llm_top_1_test_accuracy": 0.685, |
| "llm_top_2_test_accuracy": 0.749, |
| "llm_top_5_test_accuracy": 0.79, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.960500031709671, |
| "sae_top_1_test_accuracy": 0.765, |
| "sae_top_2_test_accuracy": 0.852, |
| "sae_top_5_test_accuracy": 0.886, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9688000440597534, |
| "llm_top_1_test_accuracy": 0.6298, |
| "llm_top_2_test_accuracy": 0.6841999999999999, |
| "llm_top_5_test_accuracy": 0.7826, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9742000460624695, |
| "sae_top_1_test_accuracy": 0.6944000000000001, |
| "sae_top_2_test_accuracy": 0.8158000000000001, |
| "sae_top_5_test_accuracy": 0.9146000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9507500529289246, |
| "llm_top_1_test_accuracy": 0.65025, |
| "llm_top_2_test_accuracy": 0.747, |
| "llm_top_5_test_accuracy": 0.82725, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9462500363588333, |
| "sae_top_1_test_accuracy": 0.814, |
| "sae_top_2_test_accuracy": 0.8614999999999999, |
| "sae_top_5_test_accuracy": 0.89975, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9996000289916992, |
| "llm_top_1_test_accuracy": 0.9596, |
| "llm_top_2_test_accuracy": 0.9905999999999999, |
| "llm_top_5_test_accuracy": 0.9987999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9994000196456909, |
| "sae_top_1_test_accuracy": 0.9526, |
| "sae_top_2_test_accuracy": 0.9892, |
| "sae_top_5_test_accuracy": 0.9986, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.21.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.21.hook_resid_post", |
| "hook_layer": 21, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9450000524520874, |
| "1": 0.9690000414848328, |
| "2": 0.9600000381469727, |
| "6": 0.9920000433921814, |
| "9": 0.9820000529289246 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9510000348091125, |
| "1": 0.9630000591278076, |
| "2": 0.9580000638961792, |
| "6": 0.9880000352859497, |
| "9": 0.9810000658035278 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.578, |
| "1": 0.658, |
| "2": 0.691, |
| "6": 0.744, |
| "9": 0.662 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.567, |
| "1": 0.67, |
| "2": 0.786, |
| "6": 0.807, |
| "9": 0.746 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.619, |
| "1": 0.708, |
| "2": 0.828, |
| "6": 0.906, |
| "9": 0.928 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.575, |
| "1": 0.806, |
| "2": 0.869, |
| "6": 0.981, |
| "9": 0.942 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.873, |
| "1": 0.82, |
| "2": 0.869, |
| "6": 0.982, |
| "9": 0.949 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.87, |
| "1": 0.934, |
| "2": 0.9, |
| "6": 0.989, |
| "9": 0.961 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9590000510215759, |
| "13": 0.937000036239624, |
| "14": 0.9510000348091125, |
| "18": 0.9230000376701355, |
| "19": 0.9630000591278076 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9660000205039978, |
| "13": 0.956000030040741, |
| "14": 0.9690000414848328, |
| "18": 0.9410000443458557, |
| "19": 0.9620000720024109 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.558, |
| "13": 0.666, |
| "14": 0.647, |
| "18": 0.695, |
| "19": 0.778 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.598, |
| "13": 0.675, |
| "14": 0.656, |
| "18": 0.693, |
| "19": 0.803 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.83, |
| "13": 0.756, |
| "14": 0.658, |
| "18": 0.741, |
| "19": 0.793 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.733, |
| "13": 0.661, |
| "14": 0.909, |
| "18": 0.715, |
| "19": 0.8 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.847, |
| "13": 0.779, |
| "14": 0.907, |
| "18": 0.907, |
| "19": 0.871 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.942, |
| "13": 0.855, |
| "14": 0.903, |
| "18": 0.906, |
| "19": 0.899 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9490000605583191, |
| "21": 0.9180000424385071, |
| "22": 0.9220000505447388, |
| "25": 0.9600000381469727, |
| "26": 0.8970000147819519 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9610000252723694, |
| "21": 0.921000063419342, |
| "22": 0.9160000681877136, |
| "25": 0.9600000381469727, |
| "26": 0.8840000629425049 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.725, |
| "21": 0.755, |
| "22": 0.652, |
| "25": 0.714, |
| "26": 0.646 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.81, |
| "21": 0.775, |
| "22": 0.694, |
| "25": 0.738, |
| "26": 0.659 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.824, |
| "21": 0.807, |
| "22": 0.801, |
| "25": 0.787, |
| "26": 0.706 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.845, |
| "21": 0.755, |
| "22": 0.871, |
| "25": 0.872, |
| "26": 0.661 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.875, |
| "21": 0.826, |
| "22": 0.863, |
| "25": 0.876, |
| "26": 0.741 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.932, |
| "21": 0.852, |
| "22": 0.874, |
| "25": 0.916, |
| "26": 0.808 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9590000510215759, |
| "2": 0.9430000185966492, |
| "3": 0.9360000491142273, |
| "5": 0.9340000152587891, |
| "6": 0.8830000162124634 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9630000591278076, |
| "2": 0.940000057220459, |
| "3": 0.937000036239624, |
| "5": 0.9330000281333923, |
| "6": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.688, |
| "2": 0.795, |
| "3": 0.63, |
| "5": 0.566, |
| "6": 0.693 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.804, |
| "2": 0.803, |
| "3": 0.642, |
| "5": 0.776, |
| "6": 0.672 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.824, |
| "2": 0.886, |
| "3": 0.762, |
| "5": 0.822, |
| "6": 0.713 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.772, |
| "2": 0.754, |
| "3": 0.671, |
| "5": 0.87, |
| "6": 0.739 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.862, |
| "2": 0.863, |
| "3": 0.701, |
| "5": 0.893, |
| "6": 0.755 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.903, |
| "2": 0.889, |
| "3": 0.763, |
| "5": 0.904, |
| "6": 0.786 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9610000252723694, |
| "5.0": 0.9600000381469727 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9650000333786011, |
| "5.0": 0.9690000414848328 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.685, |
| "5.0": 0.685 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.749, |
| "5.0": 0.749 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.79, |
| "5.0": 0.79 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.765, |
| "5.0": 0.765 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.852, |
| "5.0": 0.852 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.886, |
| "5.0": 0.886 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9620000720024109, |
| "Python": 0.9820000529289246, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9730000495910645, |
| "PHP": 0.9650000333786011 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9600000381469727, |
| "Python": 0.9880000352859497, |
| "HTML": 0.9850000739097595, |
| "Java": 0.9570000171661377, |
| "PHP": 0.9540000557899475 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.559, |
| "Python": 0.62, |
| "HTML": 0.801, |
| "Java": 0.595, |
| "PHP": 0.574 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.657, |
| "Python": 0.629, |
| "HTML": 0.872, |
| "Java": 0.617, |
| "PHP": 0.646 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.852, |
| "Python": 0.726, |
| "HTML": 0.89, |
| "Java": 0.76, |
| "PHP": 0.685 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.633, |
| "Python": 0.604, |
| "HTML": 0.702, |
| "Java": 0.599, |
| "PHP": 0.934 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.629, |
| "Python": 0.951, |
| "HTML": 0.897, |
| "Java": 0.665, |
| "PHP": 0.937 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.858, |
| "Python": 0.963, |
| "HTML": 0.948, |
| "Java": 0.87, |
| "PHP": 0.934 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9360000491142273, |
| "1": 0.9880000352859497, |
| "2": 0.9280000329017639, |
| "3": 0.9330000281333923 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9880000352859497, |
| "2": 0.9270000457763672, |
| "3": 0.9480000734329224 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.69, |
| "1": 0.636, |
| "2": 0.555, |
| "3": 0.72 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.784, |
| "1": 0.786, |
| "2": 0.679, |
| "3": 0.739 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.833, |
| "1": 0.86, |
| "2": 0.801, |
| "3": 0.815 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.851, |
| "1": 0.92, |
| "2": 0.736, |
| "3": 0.749 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.864, |
| "1": 0.926, |
| "2": 0.853, |
| "3": 0.803 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.875, |
| "1": 0.968, |
| "2": 0.874, |
| "3": 0.882 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9980000257492065, |
| "fr": 0.999000072479248, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.94, |
| "fr": 0.996, |
| "de": 1.0, |
| "es": 0.862, |
| "nl": 1.0 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.958, |
| "fr": 0.996, |
| "de": 0.999, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 0.997, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.952, |
| "fr": 0.998, |
| "de": 0.869, |
| "es": 0.944, |
| "nl": 1.0 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.951, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.999, |
| "nl": 0.999 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.996, |
| "de": 1.0, |
| "es": 0.999, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |