| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753860450, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9538125347346067, |
| "llm_top_1_test_accuracy": 0.674875, |
| "llm_top_2_test_accuracy": 0.7175687500000001, |
| "llm_top_5_test_accuracy": 0.7749187500000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9466062963008881, |
| "sae_top_1_test_accuracy": 0.74505, |
| "sae_top_2_test_accuracy": 0.80085, |
| "sae_top_5_test_accuracy": 0.8643749999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9658000349998475, |
| "llm_top_1_test_accuracy": 0.642, |
| "llm_top_2_test_accuracy": 0.6814, |
| "llm_top_5_test_accuracy": 0.7658, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9570000529289245, |
| "sae_top_1_test_accuracy": 0.8048, |
| "sae_top_2_test_accuracy": 0.8566, |
| "sae_top_5_test_accuracy": 0.9036, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9516000390052796, |
| "llm_top_1_test_accuracy": 0.6649999999999999, |
| "llm_top_2_test_accuracy": 0.72, |
| "llm_top_5_test_accuracy": 0.7648, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9446000576019287, |
| "sae_top_1_test_accuracy": 0.6991999999999999, |
| "sae_top_2_test_accuracy": 0.783, |
| "sae_top_5_test_accuracy": 0.8084, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9300000309944153, |
| "llm_top_1_test_accuracy": 0.6642, |
| "llm_top_2_test_accuracy": 0.7068000000000001, |
| "llm_top_5_test_accuracy": 0.7394000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9220000386238099, |
| "sae_top_1_test_accuracy": 0.7188, |
| "sae_top_2_test_accuracy": 0.7902, |
| "sae_top_5_test_accuracy": 0.8464, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9064000487327576, |
| "llm_top_1_test_accuracy": 0.6102000000000001, |
| "llm_top_2_test_accuracy": 0.6286, |
| "llm_top_5_test_accuracy": 0.6768, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9058000445365906, |
| "sae_top_1_test_accuracy": 0.6918, |
| "sae_top_2_test_accuracy": 0.7285999999999999, |
| "sae_top_5_test_accuracy": 0.8065999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.95250004529953, |
| "llm_top_1_test_accuracy": 0.627, |
| "llm_top_2_test_accuracy": 0.661, |
| "llm_top_5_test_accuracy": 0.729, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9240000545978546, |
| "sae_top_1_test_accuracy": 0.699, |
| "sae_top_2_test_accuracy": 0.69, |
| "sae_top_5_test_accuracy": 0.826, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9742000460624695, |
| "llm_top_1_test_accuracy": 0.6656000000000001, |
| "llm_top_2_test_accuracy": 0.7094, |
| "llm_top_5_test_accuracy": 0.7736000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9728000402450562, |
| "sae_top_1_test_accuracy": 0.6317999999999999, |
| "sae_top_2_test_accuracy": 0.7484000000000001, |
| "sae_top_5_test_accuracy": 0.8568, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9500000327825546, |
| "llm_top_1_test_accuracy": 0.7270000000000001, |
| "llm_top_2_test_accuracy": 0.75075, |
| "llm_top_5_test_accuracy": 0.81375, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9472500383853912, |
| "sae_top_1_test_accuracy": 0.7569999999999999, |
| "sae_top_2_test_accuracy": 0.813, |
| "sae_top_5_test_accuracy": 0.869, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 1.0, |
| "llm_top_1_test_accuracy": 0.798, |
| "llm_top_2_test_accuracy": 0.8826, |
| "llm_top_5_test_accuracy": 0.9362, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9994000434875489, |
| "sae_top_1_test_accuracy": 0.958, |
| "sae_top_2_test_accuracy": 0.9969999999999999, |
| "sae_top_5_test_accuracy": 0.9982, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.6.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.6.hook_resid_post", |
| "hook_layer": 6, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9330000281333923, |
| "1": 0.9580000638961792, |
| "2": 0.9440000653266907, |
| "6": 0.9860000610351562, |
| "9": 0.9640000462532043 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9430000185966492, |
| "1": 0.9650000333786011, |
| "2": 0.9440000653266907, |
| "6": 0.9940000176429749, |
| "9": 0.9830000400543213 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.582, |
| "1": 0.61, |
| "2": 0.561, |
| "6": 0.757, |
| "9": 0.7 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.605, |
| "1": 0.638, |
| "2": 0.68, |
| "6": 0.763, |
| "9": 0.721 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.695, |
| "1": 0.752, |
| "2": 0.75, |
| "6": 0.797, |
| "9": 0.835 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.635, |
| "1": 0.602, |
| "2": 0.877, |
| "6": 0.968, |
| "9": 0.942 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.838, |
| "1": 0.628, |
| "2": 0.893, |
| "6": 0.974, |
| "9": 0.95 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.827, |
| "1": 0.863, |
| "2": 0.9, |
| "6": 0.981, |
| "9": 0.947 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9490000605583191, |
| "13": 0.9580000638961792, |
| "14": 0.9410000443458557, |
| "18": 0.9130000472068787, |
| "19": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9570000171661377, |
| "13": 0.9460000395774841, |
| "14": 0.9500000476837158, |
| "18": 0.9430000185966492, |
| "19": 0.9620000720024109 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.563, |
| "13": 0.701, |
| "14": 0.608, |
| "18": 0.662, |
| "19": 0.791 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.733, |
| "13": 0.723, |
| "14": 0.645, |
| "18": 0.706, |
| "19": 0.793 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.846, |
| "13": 0.737, |
| "14": 0.694, |
| "18": 0.725, |
| "19": 0.822 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.719, |
| "13": 0.709, |
| "14": 0.59, |
| "18": 0.653, |
| "19": 0.825 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.709, |
| "13": 0.746, |
| "14": 0.891, |
| "18": 0.732, |
| "19": 0.837 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.855, |
| "13": 0.715, |
| "14": 0.897, |
| "18": 0.737, |
| "19": 0.838 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9460000395774841, |
| "21": 0.9150000214576721, |
| "22": 0.9120000600814819, |
| "25": 0.9570000171661377, |
| "26": 0.8800000548362732 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9570000171661377, |
| "21": 0.9140000343322754, |
| "22": 0.9190000295639038, |
| "25": 0.956000030040741, |
| "26": 0.9040000438690186 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.644, |
| "21": 0.731, |
| "22": 0.622, |
| "25": 0.672, |
| "26": 0.652 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.789, |
| "21": 0.75, |
| "22": 0.618, |
| "25": 0.716, |
| "26": 0.661 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.81, |
| "21": 0.766, |
| "22": 0.704, |
| "25": 0.747, |
| "26": 0.67 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.573, |
| "21": 0.717, |
| "22": 0.883, |
| "25": 0.832, |
| "26": 0.589 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.806, |
| "21": 0.741, |
| "22": 0.884, |
| "25": 0.844, |
| "26": 0.676 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.882, |
| "21": 0.832, |
| "22": 0.879, |
| "25": 0.904, |
| "26": 0.735 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9280000329017639, |
| "2": 0.9230000376701355, |
| "3": 0.9020000696182251, |
| "5": 0.9150000214576721, |
| "6": 0.8610000610351562 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9440000653266907, |
| "2": 0.9250000715255737, |
| "3": 0.8870000243186951, |
| "5": 0.9260000586509705, |
| "6": 0.8500000238418579 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.695, |
| "2": 0.606, |
| "3": 0.626, |
| "5": 0.559, |
| "6": 0.565 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.685, |
| "2": 0.668, |
| "3": 0.607, |
| "5": 0.562, |
| "6": 0.621 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.743, |
| "2": 0.723, |
| "3": 0.6, |
| "5": 0.654, |
| "6": 0.664 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.624, |
| "2": 0.773, |
| "3": 0.573, |
| "5": 0.885, |
| "6": 0.604 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.686, |
| "2": 0.852, |
| "3": 0.594, |
| "5": 0.881, |
| "6": 0.63 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.817, |
| "2": 0.877, |
| "3": 0.715, |
| "5": 0.884, |
| "6": 0.74 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9230000376701355, |
| "5.0": 0.9250000715255737 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9520000219345093, |
| "5.0": 0.9530000686645508 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.627, |
| "5.0": 0.627 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.661, |
| "5.0": 0.661 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.729, |
| "5.0": 0.729 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.699, |
| "5.0": 0.699 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.69, |
| "5.0": 0.69 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.826, |
| "5.0": 0.826 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9610000252723694, |
| "Python": 0.987000048160553, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9650000333786011, |
| "PHP": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9590000510215759, |
| "Python": 0.9900000691413879, |
| "HTML": 0.9930000305175781, |
| "Java": 0.9750000238418579, |
| "PHP": 0.9540000557899475 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.659, |
| "Python": 0.611, |
| "HTML": 0.818, |
| "Java": 0.605, |
| "PHP": 0.635 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.682, |
| "Python": 0.677, |
| "HTML": 0.927, |
| "Java": 0.637, |
| "PHP": 0.624 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.737, |
| "Python": 0.751, |
| "HTML": 0.948, |
| "Java": 0.761, |
| "PHP": 0.671 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.634, |
| "Python": 0.603, |
| "HTML": 0.675, |
| "Java": 0.616, |
| "PHP": 0.631 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.645, |
| "Python": 0.918, |
| "HTML": 0.93, |
| "Java": 0.644, |
| "PHP": 0.605 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.725, |
| "Python": 0.945, |
| "HTML": 0.958, |
| "Java": 0.751, |
| "PHP": 0.905 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9390000700950623, |
| "1": 0.984000027179718, |
| "2": 0.9280000329017639, |
| "3": 0.9380000233650208 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9330000281333923, |
| "1": 0.9890000224113464, |
| "2": 0.9270000457763672, |
| "3": 0.9510000348091125 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.745, |
| "1": 0.798, |
| "2": 0.637, |
| "3": 0.728 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.732, |
| "1": 0.856, |
| "2": 0.661, |
| "3": 0.754 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.806, |
| "1": 0.877, |
| "2": 0.747, |
| "3": 0.825 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.832, |
| "1": 0.836, |
| "2": 0.729, |
| "3": 0.631 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.843, |
| "1": 0.938, |
| "2": 0.721, |
| "3": 0.75 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.867, |
| "1": 0.94, |
| "2": 0.82, |
| "3": 0.849 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 0.999000072479248 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.884, |
| "fr": 0.648, |
| "de": 0.747, |
| "es": 0.903, |
| "nl": 0.808 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.903, |
| "fr": 0.894, |
| "de": 0.834, |
| "es": 0.952, |
| "nl": 0.83 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.974, |
| "fr": 0.96, |
| "de": 0.886, |
| "es": 0.976, |
| "nl": 0.885 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.997, |
| "fr": 0.994, |
| "de": 0.995, |
| "es": 0.993, |
| "nl": 0.811 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.996, |
| "de": 0.998, |
| "es": 0.993, |
| "nl": 0.998 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.996, |
| "de": 1.0, |
| "es": 0.997, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |