| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754186890, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9599062964320183, |
| "llm_top_1_test_accuracy": 0.6695499999999999, |
| "llm_top_2_test_accuracy": 0.7314999999999999, |
| "llm_top_5_test_accuracy": 0.7825125000000002, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9542938005179168, |
| "sae_top_1_test_accuracy": 0.73414375, |
| "sae_top_2_test_accuracy": 0.80335, |
| "sae_top_5_test_accuracy": 0.8805437500000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9680000424385071, |
| "llm_top_1_test_accuracy": 0.6581999999999999, |
| "llm_top_2_test_accuracy": 0.6994, |
| "llm_top_5_test_accuracy": 0.7882, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9642000555992126, |
| "sae_top_1_test_accuracy": 0.748, |
| "sae_top_2_test_accuracy": 0.8190000000000002, |
| "sae_top_5_test_accuracy": 0.9057999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9584000468254089, |
| "llm_top_1_test_accuracy": 0.6801999999999999, |
| "llm_top_2_test_accuracy": 0.7325999999999999, |
| "llm_top_5_test_accuracy": 0.761, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9484000563621521, |
| "sae_top_1_test_accuracy": 0.6984000000000001, |
| "sae_top_2_test_accuracy": 0.7691999999999999, |
| "sae_top_5_test_accuracy": 0.8379999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9332000494003296, |
| "llm_top_1_test_accuracy": 0.6806, |
| "llm_top_2_test_accuracy": 0.7332000000000001, |
| "llm_top_5_test_accuracy": 0.7572, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9270000457763672, |
| "sae_top_1_test_accuracy": 0.7604000000000001, |
| "sae_top_2_test_accuracy": 0.8204, |
| "sae_top_5_test_accuracy": 0.8582000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9202000379562378, |
| "llm_top_1_test_accuracy": 0.6034, |
| "llm_top_2_test_accuracy": 0.6448, |
| "llm_top_5_test_accuracy": 0.6862, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9164000511169433, |
| "sae_top_1_test_accuracy": 0.607, |
| "sae_top_2_test_accuracy": 0.7491999999999999, |
| "sae_top_5_test_accuracy": 0.8207999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9760000705718994, |
| "llm_top_1_test_accuracy": 0.643, |
| "llm_top_2_test_accuracy": 0.705, |
| "llm_top_5_test_accuracy": 0.744, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9625000655651093, |
| "sae_top_1_test_accuracy": 0.763, |
| "sae_top_2_test_accuracy": 0.769, |
| "sae_top_5_test_accuracy": 0.863, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9718000411987304, |
| "llm_top_1_test_accuracy": 0.6456000000000001, |
| "llm_top_2_test_accuracy": 0.7041999999999999, |
| "llm_top_5_test_accuracy": 0.7738, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9670000553131104, |
| "sae_top_1_test_accuracy": 0.6804, |
| "sae_top_2_test_accuracy": 0.6843999999999999, |
| "sae_top_5_test_accuracy": 0.875, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.952250063419342, |
| "llm_top_1_test_accuracy": 0.715, |
| "llm_top_2_test_accuracy": 0.774, |
| "llm_top_5_test_accuracy": 0.8215, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.950250044465065, |
| "sae_top_1_test_accuracy": 0.7767499999999999, |
| "sae_top_2_test_accuracy": 0.827, |
| "sae_top_5_test_accuracy": 0.88975, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9994000196456909, |
| "llm_top_1_test_accuracy": 0.7303999999999999, |
| "llm_top_2_test_accuracy": 0.8587999999999999, |
| "llm_top_5_test_accuracy": 0.9282, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9986000299453736, |
| "sae_top_1_test_accuracy": 0.8392000000000002, |
| "sae_top_2_test_accuracy": 0.9885999999999999, |
| "sae_top_5_test_accuracy": 0.9937999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.10.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.10.hook_resid_post", |
| "hook_layer": 10, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9510000348091125, |
| "1": 0.9580000638961792, |
| "2": 0.9500000476837158, |
| "6": 0.9900000691413879, |
| "9": 0.9720000624656677 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9520000219345093, |
| "1": 0.9660000205039978, |
| "2": 0.9530000686645508, |
| "6": 0.9920000433921814, |
| "9": 0.9770000576972961 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.555, |
| "1": 0.655, |
| "2": 0.675, |
| "6": 0.827, |
| "9": 0.579 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.607, |
| "1": 0.663, |
| "2": 0.674, |
| "6": 0.835, |
| "9": 0.718 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.709, |
| "1": 0.722, |
| "2": 0.746, |
| "6": 0.912, |
| "9": 0.852 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.553, |
| "1": 0.656, |
| "2": 0.779, |
| "6": 0.825, |
| "9": 0.927 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.645, |
| "1": 0.659, |
| "2": 0.886, |
| "6": 0.978, |
| "9": 0.927 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.865, |
| "1": 0.856, |
| "2": 0.899, |
| "6": 0.977, |
| "9": 0.932 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9650000333786011, |
| "13": 0.9480000734329224, |
| "14": 0.9580000638961792, |
| "18": 0.9160000681877136, |
| "19": 0.9550000429153442 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9640000462532043, |
| "13": 0.9660000205039978, |
| "14": 0.9670000672340393, |
| "18": 0.9330000281333923, |
| "19": 0.9620000720024109 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.577, |
| "13": 0.673, |
| "14": 0.648, |
| "18": 0.7, |
| "19": 0.803 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.771, |
| "13": 0.709, |
| "14": 0.663, |
| "18": 0.723, |
| "19": 0.797 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.797, |
| "13": 0.733, |
| "14": 0.711, |
| "18": 0.729, |
| "19": 0.835 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.727, |
| "13": 0.687, |
| "14": 0.632, |
| "18": 0.676, |
| "19": 0.77 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.727, |
| "13": 0.68, |
| "14": 0.877, |
| "18": 0.726, |
| "19": 0.836 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.849, |
| "13": 0.862, |
| "14": 0.875, |
| "18": 0.756, |
| "19": 0.848 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9530000686645508, |
| "21": 0.9230000376701355, |
| "22": 0.9180000424385071, |
| "25": 0.9600000381469727, |
| "26": 0.8810000419616699 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.9230000376701355, |
| "22": 0.921000063419342, |
| "25": 0.9670000672340393, |
| "26": 0.9010000228881836 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.725, |
| "21": 0.738, |
| "22": 0.636, |
| "25": 0.719, |
| "26": 0.585 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.818, |
| "21": 0.77, |
| "22": 0.663, |
| "25": 0.746, |
| "26": 0.669 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.838, |
| "21": 0.775, |
| "22": 0.691, |
| "25": 0.798, |
| "26": 0.684 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.897, |
| "21": 0.723, |
| "22": 0.858, |
| "25": 0.721, |
| "26": 0.603 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.909, |
| "21": 0.728, |
| "22": 0.898, |
| "25": 0.859, |
| "26": 0.708 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.9, |
| "21": 0.838, |
| "22": 0.895, |
| "25": 0.889, |
| "26": 0.769 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9450000524520874, |
| "2": 0.9360000491142273, |
| "3": 0.9170000553131104, |
| "5": 0.9130000472068787, |
| "6": 0.8710000514984131 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9470000267028809, |
| "2": 0.9240000247955322, |
| "3": 0.9170000553131104, |
| "5": 0.9290000200271606, |
| "6": 0.8840000629425049 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.696, |
| "2": 0.554, |
| "3": 0.604, |
| "5": 0.555, |
| "6": 0.608 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.734, |
| "2": 0.649, |
| "3": 0.612, |
| "5": 0.588, |
| "6": 0.641 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.75, |
| "2": 0.724, |
| "3": 0.635, |
| "5": 0.634, |
| "6": 0.688 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.625, |
| "2": 0.628, |
| "3": 0.602, |
| "5": 0.556, |
| "6": 0.624 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.828, |
| "2": 0.847, |
| "3": 0.601, |
| "5": 0.763, |
| "6": 0.707 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.909, |
| "2": 0.886, |
| "3": 0.71, |
| "5": 0.869, |
| "6": 0.73 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9630000591278076, |
| "5.0": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9760000705718994, |
| "5.0": 0.9760000705718994 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.643, |
| "5.0": 0.643 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.705, |
| "5.0": 0.705 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.744, |
| "5.0": 0.744 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.763, |
| "5.0": 0.763 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.769, |
| "5.0": 0.769 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.863, |
| "5.0": 0.863 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9530000686645508, |
| "Python": 0.9850000739097595, |
| "HTML": 0.984000027179718, |
| "Java": 0.9550000429153442, |
| "PHP": 0.9580000638961792 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9600000381469727, |
| "Python": 0.9860000610351562, |
| "HTML": 0.9880000352859497, |
| "Java": 0.9700000286102295, |
| "PHP": 0.9550000429153442 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.656, |
| "Python": 0.629, |
| "HTML": 0.73, |
| "Java": 0.603, |
| "PHP": 0.61 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.711, |
| "Python": 0.695, |
| "HTML": 0.796, |
| "Java": 0.666, |
| "PHP": 0.653 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.77, |
| "Python": 0.73, |
| "HTML": 0.936, |
| "Java": 0.753, |
| "PHP": 0.68 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.632, |
| "Python": 0.626, |
| "HTML": 0.936, |
| "Java": 0.613, |
| "PHP": 0.595 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.624, |
| "Python": 0.658, |
| "HTML": 0.931, |
| "Java": 0.618, |
| "PHP": 0.591 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.792, |
| "Python": 0.963, |
| "HTML": 0.96, |
| "Java": 0.741, |
| "PHP": 0.919 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9350000619888306, |
| "1": 0.9830000400543213, |
| "2": 0.9320000410079956, |
| "3": 0.9510000348091125 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9440000653266907, |
| "1": 0.9860000610351562, |
| "2": 0.9300000667572021, |
| "3": 0.9490000605583191 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.791, |
| "1": 0.656, |
| "2": 0.666, |
| "3": 0.747 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.789, |
| "1": 0.797, |
| "2": 0.699, |
| "3": 0.811 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.808, |
| "1": 0.877, |
| "2": 0.757, |
| "3": 0.844 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.718, |
| "1": 0.904, |
| "2": 0.719, |
| "3": 0.766 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.843, |
| "1": 0.902, |
| "2": 0.738, |
| "3": 0.825 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.876, |
| "1": 0.932, |
| "2": 0.847, |
| "3": 0.904 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.9960000514984131, |
| "de": 0.999000072479248, |
| "es": 1.0, |
| "nl": 0.9980000257492065 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 0.9980000257492065 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.717, |
| "fr": 0.599, |
| "de": 0.778, |
| "es": 0.916, |
| "nl": 0.642 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.837, |
| "fr": 0.889, |
| "de": 0.828, |
| "es": 0.961, |
| "nl": 0.779 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.917, |
| "fr": 0.959, |
| "de": 0.917, |
| "es": 0.978, |
| "nl": 0.87 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.996, |
| "fr": 0.6, |
| "de": 0.974, |
| "es": 0.992, |
| "nl": 0.634 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.996, |
| "fr": 0.989, |
| "de": 0.97, |
| "es": 0.992, |
| "nl": 0.996 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.996, |
| "fr": 0.995, |
| "de": 0.985, |
| "es": 0.997, |
| "nl": 0.996 |
| } |
| } |
| } |
| } |