| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754268201, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9581187922507525, |
| "llm_top_1_test_accuracy": 0.6539125, |
| "llm_top_2_test_accuracy": 0.72373125, |
| "llm_top_5_test_accuracy": 0.7846937500000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9563562951982022, |
| "sae_top_1_test_accuracy": 0.71570625, |
| "sae_top_2_test_accuracy": 0.77225, |
| "sae_top_5_test_accuracy": 0.86085, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9666000485420227, |
| "llm_top_1_test_accuracy": 0.6594, |
| "llm_top_2_test_accuracy": 0.7166, |
| "llm_top_5_test_accuracy": 0.7886, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9640000343322754, |
| "sae_top_1_test_accuracy": 0.7664, |
| "sae_top_2_test_accuracy": 0.8196, |
| "sae_top_5_test_accuracy": 0.885, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9550000548362731, |
| "llm_top_1_test_accuracy": 0.6314000000000001, |
| "llm_top_2_test_accuracy": 0.7233999999999999, |
| "llm_top_5_test_accuracy": 0.7552, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9498000383377075, |
| "sae_top_1_test_accuracy": 0.7314, |
| "sae_top_2_test_accuracy": 0.7932, |
| "sae_top_5_test_accuracy": 0.851, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9312000393867492, |
| "llm_top_1_test_accuracy": 0.677, |
| "llm_top_2_test_accuracy": 0.7414, |
| "llm_top_5_test_accuracy": 0.7710000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9294000506401062, |
| "sae_top_1_test_accuracy": 0.7273999999999999, |
| "sae_top_2_test_accuracy": 0.7746, |
| "sae_top_5_test_accuracy": 0.868, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9190000414848327, |
| "llm_top_1_test_accuracy": 0.6062000000000001, |
| "llm_top_2_test_accuracy": 0.6491999999999999, |
| "llm_top_5_test_accuracy": 0.6888, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9174000501632691, |
| "sae_top_1_test_accuracy": 0.6384000000000001, |
| "sae_top_2_test_accuracy": 0.6872, |
| "sae_top_5_test_accuracy": 0.8248, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9775000512599945, |
| "llm_top_1_test_accuracy": 0.654, |
| "llm_top_2_test_accuracy": 0.709, |
| "llm_top_5_test_accuracy": 0.752, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9705000519752502, |
| "sae_top_1_test_accuracy": 0.585, |
| "sae_top_2_test_accuracy": 0.637, |
| "sae_top_5_test_accuracy": 0.786, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9676000475883484, |
| "llm_top_1_test_accuracy": 0.6306, |
| "llm_top_2_test_accuracy": 0.7026, |
| "llm_top_5_test_accuracy": 0.78, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9716000556945801, |
| "sae_top_1_test_accuracy": 0.6788000000000001, |
| "sae_top_2_test_accuracy": 0.6848, |
| "sae_top_5_test_accuracy": 0.799, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9482500404119492, |
| "llm_top_1_test_accuracy": 0.6525000000000001, |
| "llm_top_2_test_accuracy": 0.77225, |
| "llm_top_5_test_accuracy": 0.82675, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9497500360012054, |
| "sae_top_1_test_accuracy": 0.82425, |
| "sae_top_2_test_accuracy": 0.843, |
| "sae_top_5_test_accuracy": 0.8789999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.7202000000000001, |
| "llm_top_2_test_accuracy": 0.7754, |
| "llm_top_5_test_accuracy": 0.9152000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9984000444412231, |
| "sae_top_1_test_accuracy": 0.7739999999999999, |
| "sae_top_2_test_accuracy": 0.9385999999999999, |
| "sae_top_5_test_accuracy": 0.994, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.11.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.11.hook_resid_post", |
| "hook_layer": 11, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9430000185966492, |
| "1": 0.9610000252723694, |
| "2": 0.9530000686645508, |
| "6": 0.9890000224113464, |
| "9": 0.9740000367164612 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9530000686645508, |
| "1": 0.9610000252723694, |
| "2": 0.9510000348091125, |
| "6": 0.9910000562667847, |
| "9": 0.9770000576972961 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.588, |
| "1": 0.616, |
| "2": 0.658, |
| "6": 0.749, |
| "9": 0.686 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.576, |
| "1": 0.661, |
| "2": 0.673, |
| "6": 0.817, |
| "9": 0.856 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.706, |
| "1": 0.714, |
| "2": 0.758, |
| "6": 0.903, |
| "9": 0.862 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.673, |
| "1": 0.599, |
| "2": 0.867, |
| "6": 0.755, |
| "9": 0.938 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.692, |
| "1": 0.628, |
| "2": 0.859, |
| "6": 0.983, |
| "9": 0.936 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.77, |
| "1": 0.848, |
| "2": 0.889, |
| "6": 0.977, |
| "9": 0.941 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9550000429153442, |
| "13": 0.9490000605583191, |
| "14": 0.956000030040741, |
| "18": 0.9230000376701355, |
| "19": 0.9660000205039978 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9620000720024109, |
| "13": 0.9610000252723694, |
| "14": 0.9590000510215759, |
| "18": 0.9300000667572021, |
| "19": 0.9630000591278076 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.525, |
| "13": 0.676, |
| "14": 0.649, |
| "18": 0.643, |
| "19": 0.664 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.75, |
| "13": 0.707, |
| "14": 0.666, |
| "18": 0.719, |
| "19": 0.775 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.76, |
| "13": 0.73, |
| "14": 0.719, |
| "18": 0.719, |
| "19": 0.848 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.844, |
| "13": 0.687, |
| "14": 0.626, |
| "18": 0.658, |
| "19": 0.842 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.856, |
| "13": 0.686, |
| "14": 0.863, |
| "18": 0.725, |
| "19": 0.836 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.865, |
| "13": 0.799, |
| "14": 0.869, |
| "18": 0.872, |
| "19": 0.85 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9610000252723694, |
| "21": 0.9160000681877136, |
| "22": 0.9160000681877136, |
| "25": 0.9650000333786011, |
| "26": 0.8890000581741333 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9580000638961792, |
| "21": 0.9240000247955322, |
| "22": 0.9200000166893005, |
| "25": 0.9690000414848328, |
| "26": 0.8850000500679016 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.733, |
| "21": 0.759, |
| "22": 0.636, |
| "25": 0.668, |
| "26": 0.589 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.832, |
| "21": 0.758, |
| "22": 0.663, |
| "25": 0.771, |
| "26": 0.683 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.862, |
| "21": 0.8, |
| "22": 0.671, |
| "25": 0.83, |
| "26": 0.692 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.913, |
| "21": 0.776, |
| "22": 0.494, |
| "25": 0.861, |
| "26": 0.593 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.911, |
| "21": 0.771, |
| "22": 0.633, |
| "25": 0.864, |
| "26": 0.694 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.92, |
| "21": 0.848, |
| "22": 0.89, |
| "25": 0.882, |
| "26": 0.8 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9420000314712524, |
| "2": 0.9260000586509705, |
| "3": 0.921000063419342, |
| "5": 0.9160000681877136, |
| "6": 0.8820000290870667 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9490000605583191, |
| "2": 0.9310000538825989, |
| "3": 0.9240000247955322, |
| "5": 0.9280000329017639, |
| "6": 0.8630000352859497 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.695, |
| "2": 0.591, |
| "3": 0.604, |
| "5": 0.568, |
| "6": 0.573 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.757, |
| "2": 0.644, |
| "3": 0.609, |
| "5": 0.621, |
| "6": 0.615 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.763, |
| "2": 0.731, |
| "3": 0.631, |
| "5": 0.652, |
| "6": 0.667 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.651, |
| "2": 0.631, |
| "3": 0.647, |
| "5": 0.633, |
| "6": 0.63 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.709, |
| "2": 0.665, |
| "3": 0.651, |
| "5": 0.751, |
| "6": 0.66 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.908, |
| "2": 0.864, |
| "3": 0.753, |
| "5": 0.878, |
| "6": 0.721 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.971000075340271, |
| "5.0": 0.9700000286102295 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9790000319480896, |
| "5.0": 0.9760000705718994 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.654, |
| "5.0": 0.654 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.709, |
| "5.0": 0.709 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.752, |
| "5.0": 0.752 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.585, |
| "5.0": 0.585 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.637, |
| "5.0": 0.637 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.786, |
| "5.0": 0.786 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.968000054359436, |
| "Python": 0.9820000529289246, |
| "HTML": 0.9850000739097595, |
| "Java": 0.9580000638961792, |
| "PHP": 0.9650000333786011 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9540000557899475, |
| "Python": 0.984000027179718, |
| "HTML": 0.9910000562667847, |
| "Java": 0.9510000348091125, |
| "PHP": 0.9580000638961792 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.557, |
| "Python": 0.644, |
| "HTML": 0.74, |
| "Java": 0.598, |
| "PHP": 0.614 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.731, |
| "Python": 0.677, |
| "HTML": 0.807, |
| "Java": 0.64, |
| "PHP": 0.658 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.765, |
| "Python": 0.737, |
| "HTML": 0.931, |
| "Java": 0.766, |
| "PHP": 0.701 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.627, |
| "Python": 0.642, |
| "HTML": 0.939, |
| "Java": 0.596, |
| "PHP": 0.59 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.634, |
| "Python": 0.664, |
| "HTML": 0.931, |
| "Java": 0.622, |
| "PHP": 0.573 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.694, |
| "Python": 0.673, |
| "HTML": 0.956, |
| "Java": 0.746, |
| "PHP": 0.926 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.984000027179718, |
| "2": 0.9380000233650208, |
| "3": 0.937000036239624 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9280000329017639, |
| "1": 0.9880000352859497, |
| "2": 0.937000036239624, |
| "3": 0.940000057220459 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.617, |
| "1": 0.673, |
| "2": 0.534, |
| "3": 0.786 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.784, |
| "1": 0.808, |
| "2": 0.678, |
| "3": 0.819 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.814, |
| "1": 0.872, |
| "2": 0.765, |
| "3": 0.856 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.831, |
| "1": 0.951, |
| "2": 0.773, |
| "3": 0.742 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.82, |
| "1": 0.949, |
| "2": 0.801, |
| "3": 0.802 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.865, |
| "1": 0.951, |
| "2": 0.828, |
| "3": 0.872 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 0.9980000257492065, |
| "nl": 0.9960000514984131 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.722, |
| "fr": 0.585, |
| "de": 0.744, |
| "es": 0.918, |
| "nl": 0.632 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.82, |
| "fr": 0.573, |
| "de": 0.762, |
| "es": 0.956, |
| "nl": 0.766 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.902, |
| "fr": 0.921, |
| "de": 0.909, |
| "es": 0.975, |
| "nl": 0.869 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.592, |
| "de": 0.64, |
| "es": 0.994, |
| "nl": 0.645 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.988, |
| "de": 0.994, |
| "es": 0.995, |
| "nl": 0.718 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.989, |
| "de": 0.993, |
| "es": 0.995, |
| "nl": 0.995 |
| } |
| } |
| } |
| } |