| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753778261, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.94785629324615, |
| "llm_top_1_test_accuracy": 0.679, |
| "llm_top_2_test_accuracy": 0.7241375, |
| "llm_top_5_test_accuracy": 0.7792437499999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9424250468611718, |
| "sae_top_1_test_accuracy": 0.761725, |
| "sae_top_2_test_accuracy": 0.7938937500000001, |
| "sae_top_5_test_accuracy": 0.8631499999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9626000404357911, |
| "llm_top_1_test_accuracy": 0.6714, |
| "llm_top_2_test_accuracy": 0.6866, |
| "llm_top_5_test_accuracy": 0.7459999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9566000461578369, |
| "sae_top_1_test_accuracy": 0.7922, |
| "sae_top_2_test_accuracy": 0.8124, |
| "sae_top_5_test_accuracy": 0.9067999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9476000547409058, |
| "llm_top_1_test_accuracy": 0.6726, |
| "llm_top_2_test_accuracy": 0.7218, |
| "llm_top_5_test_accuracy": 0.7754, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9378000497817993, |
| "sae_top_1_test_accuracy": 0.7180000000000001, |
| "sae_top_2_test_accuracy": 0.7326, |
| "sae_top_5_test_accuracy": 0.8038000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9228000402450561, |
| "llm_top_1_test_accuracy": 0.6772, |
| "llm_top_2_test_accuracy": 0.708, |
| "llm_top_5_test_accuracy": 0.7418, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.920400059223175, |
| "sae_top_1_test_accuracy": 0.7306000000000001, |
| "sae_top_2_test_accuracy": 0.7992, |
| "sae_top_5_test_accuracy": 0.835, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9000000357627869, |
| "llm_top_1_test_accuracy": 0.615, |
| "llm_top_2_test_accuracy": 0.6302000000000001, |
| "llm_top_5_test_accuracy": 0.6941999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.8984000444412231, |
| "sae_top_1_test_accuracy": 0.692, |
| "sae_top_2_test_accuracy": 0.7429999999999999, |
| "sae_top_5_test_accuracy": 0.8023999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9340000450611115, |
| "llm_top_1_test_accuracy": 0.628, |
| "llm_top_2_test_accuracy": 0.686, |
| "llm_top_5_test_accuracy": 0.738, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9130000472068787, |
| "sae_top_1_test_accuracy": 0.755, |
| "sae_top_2_test_accuracy": 0.776, |
| "sae_top_5_test_accuracy": 0.842, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9710000514984131, |
| "llm_top_1_test_accuracy": 0.6568, |
| "llm_top_2_test_accuracy": 0.708, |
| "llm_top_5_test_accuracy": 0.7871999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9690000414848328, |
| "sae_top_1_test_accuracy": 0.6876, |
| "sae_top_2_test_accuracy": 0.6894000000000001, |
| "sae_top_5_test_accuracy": 0.843, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9452500492334366, |
| "llm_top_1_test_accuracy": 0.7120000000000001, |
| "llm_top_2_test_accuracy": 0.7595, |
| "llm_top_5_test_accuracy": 0.80375, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9450000524520874, |
| "sae_top_1_test_accuracy": 0.755, |
| "sae_top_2_test_accuracy": 0.80175, |
| "sae_top_5_test_accuracy": 0.8739999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9996000289916992, |
| "llm_top_1_test_accuracy": 0.799, |
| "llm_top_2_test_accuracy": 0.893, |
| "llm_top_5_test_accuracy": 0.9475999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9992000341415406, |
| "sae_top_1_test_accuracy": 0.9634, |
| "sae_top_2_test_accuracy": 0.9968, |
| "sae_top_5_test_accuracy": 0.9982, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.5.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.5.hook_resid_post", |
| "hook_layer": 5, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.937000036239624, |
| "1": 0.9550000429153442, |
| "2": 0.940000057220459, |
| "6": 0.9790000319480896, |
| "9": 0.9720000624656677 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9360000491142273, |
| "1": 0.9650000333786011, |
| "2": 0.9450000524520874, |
| "6": 0.9940000176429749, |
| "9": 0.9730000495910645 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.595, |
| "1": 0.64, |
| "2": 0.67, |
| "6": 0.765, |
| "9": 0.687 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.6, |
| "1": 0.619, |
| "2": 0.674, |
| "6": 0.809, |
| "9": 0.731 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.697, |
| "1": 0.733, |
| "2": 0.72, |
| "6": 0.825, |
| "9": 0.755 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.78, |
| "1": 0.614, |
| "2": 0.862, |
| "6": 0.965, |
| "9": 0.74 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.799, |
| "1": 0.661, |
| "2": 0.862, |
| "6": 0.971, |
| "9": 0.769 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.874, |
| "1": 0.834, |
| "2": 0.907, |
| "6": 0.976, |
| "9": 0.943 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9480000734329224, |
| "13": 0.9450000524520874, |
| "14": 0.9390000700950623, |
| "18": 0.9000000357627869, |
| "19": 0.9570000171661377 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9580000638961792, |
| "13": 0.9440000653266907, |
| "14": 0.9510000348091125, |
| "18": 0.9220000505447388, |
| "19": 0.9630000591278076 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.581, |
| "13": 0.693, |
| "14": 0.643, |
| "18": 0.687, |
| "19": 0.759 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.76, |
| "13": 0.698, |
| "14": 0.664, |
| "18": 0.713, |
| "19": 0.774 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.862, |
| "13": 0.769, |
| "14": 0.698, |
| "18": 0.727, |
| "19": 0.821 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.819, |
| "13": 0.697, |
| "14": 0.637, |
| "18": 0.675, |
| "19": 0.762 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.841, |
| "13": 0.732, |
| "14": 0.644, |
| "18": 0.702, |
| "19": 0.744 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.861, |
| "13": 0.728, |
| "14": 0.809, |
| "18": 0.74, |
| "19": 0.881 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9490000605583191, |
| "21": 0.9110000729560852, |
| "22": 0.9110000729560852, |
| "25": 0.9540000557899475, |
| "26": 0.8770000338554382 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9440000653266907, |
| "21": 0.9130000472068787, |
| "22": 0.9180000424385071, |
| "25": 0.956000030040741, |
| "26": 0.8830000162124634 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.652, |
| "21": 0.718, |
| "22": 0.623, |
| "25": 0.737, |
| "26": 0.656 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.784, |
| "21": 0.759, |
| "22": 0.606, |
| "25": 0.737, |
| "26": 0.654 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.821, |
| "21": 0.769, |
| "22": 0.706, |
| "25": 0.752, |
| "26": 0.661 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.903, |
| "21": 0.592, |
| "22": 0.891, |
| "25": 0.663, |
| "26": 0.604 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.91, |
| "21": 0.691, |
| "22": 0.884, |
| "25": 0.867, |
| "26": 0.644 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.915, |
| "21": 0.737, |
| "22": 0.877, |
| "25": 0.891, |
| "26": 0.755 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9290000200271606, |
| "2": 0.9230000376701355, |
| "3": 0.8840000629425049, |
| "5": 0.9040000438690186, |
| "6": 0.8520000576972961 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9240000247955322, |
| "2": 0.9200000166893005, |
| "3": 0.9040000438690186, |
| "5": 0.9030000567436218, |
| "6": 0.8490000367164612 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.711, |
| "2": 0.595, |
| "3": 0.604, |
| "5": 0.558, |
| "6": 0.607 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.695, |
| "2": 0.684, |
| "3": 0.604, |
| "5": 0.558, |
| "6": 0.61 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.755, |
| "2": 0.742, |
| "3": 0.625, |
| "5": 0.658, |
| "6": 0.691 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.616, |
| "2": 0.825, |
| "3": 0.623, |
| "5": 0.808, |
| "6": 0.588 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.857, |
| "2": 0.838, |
| "3": 0.615, |
| "5": 0.813, |
| "6": 0.592 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.888, |
| "2": 0.865, |
| "3": 0.728, |
| "5": 0.845, |
| "6": 0.686 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9150000214576721, |
| "5.0": 0.9110000729560852 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9350000619888306, |
| "5.0": 0.9330000281333923 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.628, |
| "5.0": 0.628 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.686, |
| "5.0": 0.686 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.738, |
| "5.0": 0.738 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.755, |
| "5.0": 0.755 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.776, |
| "5.0": 0.776 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.842, |
| "5.0": 0.842 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9500000476837158, |
| "Python": 0.984000027179718, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9640000462532043, |
| "PHP": 0.9580000638961792 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9600000381469727, |
| "Python": 0.987000048160553, |
| "HTML": 0.9900000691413879, |
| "Java": 0.9620000720024109, |
| "PHP": 0.956000030040741 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.643, |
| "Python": 0.599, |
| "HTML": 0.796, |
| "Java": 0.641, |
| "PHP": 0.605 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.699, |
| "Python": 0.615, |
| "HTML": 0.935, |
| "Java": 0.665, |
| "PHP": 0.626 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.752, |
| "Python": 0.77, |
| "HTML": 0.955, |
| "Java": 0.764, |
| "PHP": 0.695 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.64, |
| "Python": 0.604, |
| "HTML": 0.931, |
| "Java": 0.634, |
| "PHP": 0.629 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.638, |
| "Python": 0.635, |
| "HTML": 0.925, |
| "Java": 0.637, |
| "PHP": 0.612 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.688, |
| "Python": 0.927, |
| "HTML": 0.946, |
| "Java": 0.748, |
| "PHP": 0.906 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9740000367164612, |
| "2": 0.9160000681877136, |
| "3": 0.9500000476837158 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9310000538825989, |
| "1": 0.9810000658035278, |
| "2": 0.9290000200271606, |
| "3": 0.940000057220459 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.744, |
| "1": 0.783, |
| "2": 0.639, |
| "3": 0.682 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.738, |
| "1": 0.806, |
| "2": 0.687, |
| "3": 0.807 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.776, |
| "1": 0.877, |
| "2": 0.742, |
| "3": 0.82 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.735, |
| "1": 0.856, |
| "2": 0.732, |
| "3": 0.697 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.749, |
| "1": 0.863, |
| "2": 0.836, |
| "3": 0.759 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.861, |
| "1": 0.943, |
| "2": 0.848, |
| "3": 0.844 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9980000257492065, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 0.999000072479248 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.894, |
| "fr": 0.634, |
| "de": 0.75, |
| "es": 0.88, |
| "nl": 0.837 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.899, |
| "fr": 0.905, |
| "de": 0.83, |
| "es": 0.958, |
| "nl": 0.873 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.984, |
| "fr": 0.969, |
| "de": 0.887, |
| "es": 0.978, |
| "nl": 0.92 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.997, |
| "fr": 0.996, |
| "de": 0.986, |
| "es": 0.998, |
| "nl": 0.84 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.997, |
| "de": 0.993, |
| "es": 0.998, |
| "nl": 0.998 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 0.995, |
| "es": 0.999, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |