| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754104461, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9581625394523144, |
| "llm_top_1_test_accuracy": 0.6631999999999999, |
| "llm_top_2_test_accuracy": 0.7332624999999999, |
| "llm_top_5_test_accuracy": 0.7879062499999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9558187901973724, |
| "sae_top_1_test_accuracy": 0.73965625, |
| "sae_top_2_test_accuracy": 0.81185625, |
| "sae_top_5_test_accuracy": 0.8769125, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9702000498771668, |
| "llm_top_1_test_accuracy": 0.6676, |
| "llm_top_2_test_accuracy": 0.7209999999999999, |
| "llm_top_5_test_accuracy": 0.7853999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9678000450134278, |
| "sae_top_1_test_accuracy": 0.7868, |
| "sae_top_2_test_accuracy": 0.8408, |
| "sae_top_5_test_accuracy": 0.9099999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9532000422477722, |
| "llm_top_1_test_accuracy": 0.6739999999999999, |
| "llm_top_2_test_accuracy": 0.7259999999999999, |
| "llm_top_5_test_accuracy": 0.7646, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9514000415802002, |
| "sae_top_1_test_accuracy": 0.6723999999999999, |
| "sae_top_2_test_accuracy": 0.7872, |
| "sae_top_5_test_accuracy": 0.8661999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.934000039100647, |
| "llm_top_1_test_accuracy": 0.6738, |
| "llm_top_2_test_accuracy": 0.7295999999999999, |
| "llm_top_5_test_accuracy": 0.7596, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9300000429153442, |
| "sae_top_1_test_accuracy": 0.7304, |
| "sae_top_2_test_accuracy": 0.8160000000000001, |
| "sae_top_5_test_accuracy": 0.8364, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9212000489234924, |
| "llm_top_1_test_accuracy": 0.6205999999999999, |
| "llm_top_2_test_accuracy": 0.6534, |
| "llm_top_5_test_accuracy": 0.6881999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9232000470161438, |
| "sae_top_1_test_accuracy": 0.7206, |
| "sae_top_2_test_accuracy": 0.7776, |
| "sae_top_5_test_accuracy": 0.841, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9645000398159027, |
| "llm_top_1_test_accuracy": 0.634, |
| "llm_top_2_test_accuracy": 0.693, |
| "llm_top_5_test_accuracy": 0.756, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9555000364780426, |
| "sae_top_1_test_accuracy": 0.727, |
| "sae_top_2_test_accuracy": 0.801, |
| "sae_top_5_test_accuracy": 0.867, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9714000463485718, |
| "llm_top_1_test_accuracy": 0.6416000000000001, |
| "llm_top_2_test_accuracy": 0.7187999999999999, |
| "llm_top_5_test_accuracy": 0.7942, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9698000311851501, |
| "sae_top_1_test_accuracy": 0.6436000000000001, |
| "sae_top_2_test_accuracy": 0.6936, |
| "sae_top_5_test_accuracy": 0.8116, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9510000348091125, |
| "llm_top_1_test_accuracy": 0.6499999999999999, |
| "llm_top_2_test_accuracy": 0.7695000000000001, |
| "llm_top_5_test_accuracy": 0.8282499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9502500593662262, |
| "sae_top_1_test_accuracy": 0.68825, |
| "sae_top_2_test_accuracy": 0.83125, |
| "sae_top_5_test_accuracy": 0.8875, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.744, |
| "llm_top_2_test_accuracy": 0.8548, |
| "llm_top_5_test_accuracy": 0.9269999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9986000180244445, |
| "sae_top_1_test_accuracy": 0.9481999999999999, |
| "sae_top_2_test_accuracy": 0.9474, |
| "sae_top_5_test_accuracy": 0.9955999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.9.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.9.hook_resid_post", |
| "hook_layer": 9, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9500000476837158, |
| "1": 0.9660000205039978, |
| "2": 0.9580000638961792, |
| "6": 0.9890000224113464, |
| "9": 0.9760000705718994 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9480000734329224, |
| "1": 0.9730000495910645, |
| "2": 0.9550000429153442, |
| "6": 0.9890000224113464, |
| "9": 0.9860000610351562 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.602, |
| "1": 0.644, |
| "2": 0.686, |
| "6": 0.832, |
| "9": 0.574 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.573, |
| "1": 0.646, |
| "2": 0.702, |
| "6": 0.827, |
| "9": 0.857 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.71, |
| "1": 0.726, |
| "2": 0.744, |
| "6": 0.895, |
| "9": 0.852 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.666, |
| "1": 0.646, |
| "2": 0.859, |
| "6": 0.845, |
| "9": 0.918 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.648, |
| "1": 0.814, |
| "2": 0.851, |
| "6": 0.973, |
| "9": 0.918 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.861, |
| "1": 0.845, |
| "2": 0.897, |
| "6": 0.99, |
| "9": 0.957 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9600000381469727, |
| "13": 0.9550000429153442, |
| "14": 0.9590000510215759, |
| "18": 0.9220000505447388, |
| "19": 0.9610000252723694 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9600000381469727, |
| "13": 0.9520000219345093, |
| "14": 0.9530000686645508, |
| "18": 0.9350000619888306, |
| "19": 0.9660000205039978 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.564, |
| "13": 0.683, |
| "14": 0.629, |
| "18": 0.707, |
| "19": 0.787 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.746, |
| "13": 0.711, |
| "14": 0.634, |
| "18": 0.724, |
| "19": 0.815 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.773, |
| "13": 0.748, |
| "14": 0.722, |
| "18": 0.729, |
| "19": 0.851 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.567, |
| "13": 0.669, |
| "14": 0.64, |
| "18": 0.692, |
| "19": 0.794 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.833, |
| "13": 0.672, |
| "14": 0.868, |
| "18": 0.733, |
| "19": 0.83 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.942, |
| "13": 0.893, |
| "14": 0.876, |
| "18": 0.758, |
| "19": 0.862 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9570000171661377, |
| "21": 0.9240000247955322, |
| "22": 0.9120000600814819, |
| "25": 0.9630000591278076, |
| "26": 0.8940000534057617 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9550000429153442, |
| "21": 0.9240000247955322, |
| "22": 0.9220000505447388, |
| "25": 0.9660000205039978, |
| "26": 0.9030000567436218 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.712, |
| "21": 0.745, |
| "22": 0.616, |
| "25": 0.703, |
| "26": 0.593 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.808, |
| "21": 0.737, |
| "22": 0.691, |
| "25": 0.73, |
| "26": 0.682 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.816, |
| "21": 0.772, |
| "22": 0.703, |
| "25": 0.828, |
| "26": 0.679 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.913, |
| "21": 0.536, |
| "22": 0.885, |
| "25": 0.701, |
| "26": 0.617 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.915, |
| "21": 0.766, |
| "22": 0.883, |
| "25": 0.844, |
| "26": 0.672 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.921, |
| "21": 0.794, |
| "22": 0.888, |
| "25": 0.86, |
| "26": 0.719 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9520000219345093, |
| "2": 0.940000057220459, |
| "3": 0.9170000553131104, |
| "5": 0.9300000667572021, |
| "6": 0.8770000338554382 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9540000557899475, |
| "2": 0.9320000410079956, |
| "3": 0.9170000553131104, |
| "5": 0.9390000700950623, |
| "6": 0.8640000224113464 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.712, |
| "2": 0.596, |
| "3": 0.62, |
| "5": 0.577, |
| "6": 0.598 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.747, |
| "2": 0.636, |
| "3": 0.636, |
| "5": 0.61, |
| "6": 0.638 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.756, |
| "2": 0.769, |
| "3": 0.638, |
| "5": 0.618, |
| "6": 0.66 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.64, |
| "2": 0.865, |
| "3": 0.543, |
| "5": 0.801, |
| "6": 0.754 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.713, |
| "2": 0.874, |
| "3": 0.665, |
| "5": 0.874, |
| "6": 0.762 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.912, |
| "2": 0.88, |
| "3": 0.753, |
| "5": 0.889, |
| "6": 0.771 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9550000429153442, |
| "5.0": 0.956000030040741 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9640000462532043, |
| "5.0": 0.9650000333786011 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.634, |
| "5.0": 0.634 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.693, |
| "5.0": 0.693 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.756, |
| "5.0": 0.756 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.727, |
| "5.0": 0.727 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.801, |
| "5.0": 0.801 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.867, |
| "5.0": 0.867 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9610000252723694, |
| "Python": 0.987000048160553, |
| "HTML": 0.9830000400543213, |
| "Java": 0.9610000252723694, |
| "PHP": 0.9570000171661377 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9550000429153442, |
| "Python": 0.9900000691413879, |
| "HTML": 0.9910000562667847, |
| "Java": 0.9600000381469727, |
| "PHP": 0.9610000252723694 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.548, |
| "Python": 0.62, |
| "HTML": 0.804, |
| "Java": 0.63, |
| "PHP": 0.606 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.709, |
| "Python": 0.688, |
| "HTML": 0.916, |
| "Java": 0.654, |
| "PHP": 0.627 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.799, |
| "Python": 0.763, |
| "HTML": 0.942, |
| "Java": 0.758, |
| "PHP": 0.709 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.634, |
| "Python": 0.647, |
| "HTML": 0.717, |
| "Java": 0.62, |
| "PHP": 0.6 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.635, |
| "Python": 0.65, |
| "HTML": 0.888, |
| "Java": 0.652, |
| "PHP": 0.643 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.857, |
| "Python": 0.669, |
| "HTML": 0.945, |
| "Java": 0.666, |
| "PHP": 0.921 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9440000653266907, |
| "1": 0.9850000739097595, |
| "2": 0.9330000281333923, |
| "3": 0.9390000700950623 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9410000443458557, |
| "1": 0.9920000433921814, |
| "2": 0.9280000329017639, |
| "3": 0.9430000185966492 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.589, |
| "1": 0.698, |
| "2": 0.674, |
| "3": 0.639 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.815, |
| "1": 0.805, |
| "2": 0.7, |
| "3": 0.758 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.819, |
| "1": 0.874, |
| "2": 0.783, |
| "3": 0.837 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.686, |
| "1": 0.676, |
| "2": 0.767, |
| "3": 0.624 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.812, |
| "1": 0.832, |
| "2": 0.823, |
| "3": 0.858 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.871, |
| "1": 0.945, |
| "2": 0.836, |
| "3": 0.898 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 0.9980000257492065, |
| "es": 0.9980000257492065, |
| "nl": 0.9970000386238098 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.75, |
| "fr": 0.611, |
| "de": 0.784, |
| "es": 0.903, |
| "nl": 0.672 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.846, |
| "fr": 0.873, |
| "de": 0.84, |
| "es": 0.954, |
| "nl": 0.761 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.929, |
| "fr": 0.938, |
| "de": 0.929, |
| "es": 0.981, |
| "nl": 0.858 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.994, |
| "de": 0.932, |
| "es": 0.989, |
| "nl": 0.828 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.997, |
| "de": 0.924, |
| "es": 0.994, |
| "nl": 0.824 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.996, |
| "de": 0.988, |
| "es": 0.997, |
| "nl": 0.998 |
| } |
| } |
| } |
| } |