| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753541902, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9443000420928, |
| "llm_top_1_test_accuracy": 0.68455, |
| "llm_top_2_test_accuracy": 0.75596875, |
| "llm_top_5_test_accuracy": 0.8151249999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9415000379085541, |
| "sae_top_1_test_accuracy": 0.7356687500000001, |
| "sae_top_2_test_accuracy": 0.80564375, |
| "sae_top_5_test_accuracy": 0.8571687499999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9640000462532043, |
| "llm_top_1_test_accuracy": 0.6324, |
| "llm_top_2_test_accuracy": 0.7188000000000001, |
| "llm_top_5_test_accuracy": 0.8124, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9602000474929809, |
| "sae_top_1_test_accuracy": 0.7741999999999999, |
| "sae_top_2_test_accuracy": 0.8149999999999998, |
| "sae_top_5_test_accuracy": 0.8597999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9494000554084778, |
| "llm_top_1_test_accuracy": 0.6978, |
| "llm_top_2_test_accuracy": 0.7525999999999999, |
| "llm_top_5_test_accuracy": 0.7944, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9418000459671021, |
| "sae_top_1_test_accuracy": 0.6892000000000001, |
| "sae_top_2_test_accuracy": 0.7928, |
| "sae_top_5_test_accuracy": 0.8518000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9230000376701355, |
| "llm_top_1_test_accuracy": 0.6898, |
| "llm_top_2_test_accuracy": 0.7316, |
| "llm_top_5_test_accuracy": 0.766, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9178000330924988, |
| "sae_top_1_test_accuracy": 0.6794, |
| "sae_top_2_test_accuracy": 0.7528, |
| "sae_top_5_test_accuracy": 0.8054, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9040000438690186, |
| "llm_top_1_test_accuracy": 0.6666000000000001, |
| "llm_top_2_test_accuracy": 0.7203999999999999, |
| "llm_top_5_test_accuracy": 0.7585999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9068000435829162, |
| "sae_top_1_test_accuracy": 0.6811999999999999, |
| "sae_top_2_test_accuracy": 0.7585999999999999, |
| "sae_top_5_test_accuracy": 0.7878, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.8945000469684601, |
| "llm_top_1_test_accuracy": 0.616, |
| "llm_top_2_test_accuracy": 0.656, |
| "llm_top_5_test_accuracy": 0.721, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.8915000259876251, |
| "sae_top_1_test_accuracy": 0.64, |
| "sae_top_2_test_accuracy": 0.725, |
| "sae_top_5_test_accuracy": 0.786, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9702000379562378, |
| "llm_top_1_test_accuracy": 0.6592, |
| "llm_top_2_test_accuracy": 0.7899999999999999, |
| "llm_top_5_test_accuracy": 0.8448, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9690000534057617, |
| "sae_top_1_test_accuracy": 0.7514000000000001, |
| "sae_top_2_test_accuracy": 0.8210000000000001, |
| "sae_top_5_test_accuracy": 0.8886000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9495000541210175, |
| "llm_top_1_test_accuracy": 0.7290000000000001, |
| "llm_top_2_test_accuracy": 0.80075, |
| "llm_top_5_test_accuracy": 0.869, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9455000460147858, |
| "sae_top_1_test_accuracy": 0.74975, |
| "sae_top_2_test_accuracy": 0.80975, |
| "sae_top_5_test_accuracy": 0.87975, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.7856, |
| "llm_top_2_test_accuracy": 0.8775999999999999, |
| "llm_top_5_test_accuracy": 0.9548, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.999400007724762, |
| "sae_top_1_test_accuracy": 0.9202, |
| "sae_top_2_test_accuracy": 0.9702, |
| "sae_top_5_test_accuracy": 0.9982, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.2.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.2.hook_resid_post", |
| "hook_layer": 2, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9310000538825989, |
| "1": 0.9580000638961792, |
| "2": 0.9520000219345093, |
| "6": 0.9850000739097595, |
| "9": 0.9750000238418579 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9570000171661377, |
| "2": 0.9540000557899475, |
| "6": 0.9920000433921814, |
| "9": 0.9770000576972961 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.631, |
| "1": 0.611, |
| "2": 0.567, |
| "6": 0.775, |
| "9": 0.578 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.685, |
| "1": 0.64, |
| "2": 0.689, |
| "6": 0.777, |
| "9": 0.803 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.72, |
| "1": 0.742, |
| "2": 0.802, |
| "6": 0.9, |
| "9": 0.898 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.704, |
| "1": 0.627, |
| "2": 0.857, |
| "6": 0.752, |
| "9": 0.931 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.706, |
| "1": 0.804, |
| "2": 0.843, |
| "6": 0.783, |
| "9": 0.939 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.717, |
| "1": 0.803, |
| "2": 0.88, |
| "6": 0.959, |
| "9": 0.94 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9450000524520874, |
| "13": 0.9460000395774841, |
| "14": 0.9430000185966492, |
| "18": 0.9120000600814819, |
| "19": 0.9630000591278076 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9530000686645508, |
| "13": 0.9420000314712524, |
| "14": 0.9650000333786011, |
| "18": 0.9250000715255737, |
| "19": 0.9620000720024109 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.695, |
| "13": 0.67, |
| "14": 0.675, |
| "18": 0.696, |
| "19": 0.753 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.781, |
| "13": 0.76, |
| "14": 0.715, |
| "18": 0.71, |
| "19": 0.797 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.863, |
| "13": 0.793, |
| "14": 0.754, |
| "18": 0.722, |
| "19": 0.84 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.734, |
| "13": 0.637, |
| "14": 0.659, |
| "18": 0.687, |
| "19": 0.729 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.839, |
| "13": 0.791, |
| "14": 0.839, |
| "18": 0.699, |
| "19": 0.796 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.872, |
| "13": 0.805, |
| "14": 0.837, |
| "18": 0.889, |
| "19": 0.856 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9360000491142273, |
| "21": 0.9150000214576721, |
| "22": 0.909000039100647, |
| "25": 0.9550000429153442, |
| "26": 0.8740000128746033 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9420000314712524, |
| "21": 0.9130000472068787, |
| "22": 0.9240000247955322, |
| "25": 0.9620000720024109, |
| "26": 0.8740000128746033 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.799, |
| "21": 0.735, |
| "22": 0.631, |
| "25": 0.694, |
| "26": 0.59 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.79, |
| "21": 0.734, |
| "22": 0.719, |
| "25": 0.762, |
| "26": 0.653 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.849, |
| "21": 0.778, |
| "22": 0.746, |
| "25": 0.776, |
| "26": 0.681 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.873, |
| "21": 0.637, |
| "22": 0.633, |
| "25": 0.657, |
| "26": 0.597 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.884, |
| "21": 0.663, |
| "22": 0.676, |
| "25": 0.813, |
| "26": 0.728 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.89, |
| "21": 0.8, |
| "22": 0.744, |
| "25": 0.872, |
| "26": 0.721 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9300000667572021, |
| "2": 0.9230000376701355, |
| "3": 0.9040000438690186, |
| "5": 0.9040000438690186, |
| "6": 0.8730000257492065 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9350000619888306, |
| "2": 0.9120000600814819, |
| "3": 0.8960000276565552, |
| "5": 0.9140000343322754, |
| "6": 0.8630000352859497 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.68, |
| "2": 0.729, |
| "3": 0.597, |
| "5": 0.651, |
| "6": 0.676 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.758, |
| "2": 0.777, |
| "3": 0.653, |
| "5": 0.716, |
| "6": 0.698 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.792, |
| "2": 0.811, |
| "3": 0.732, |
| "5": 0.736, |
| "6": 0.722 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.853, |
| "2": 0.706, |
| "3": 0.576, |
| "5": 0.593, |
| "6": 0.678 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.845, |
| "2": 0.851, |
| "3": 0.575, |
| "5": 0.819, |
| "6": 0.703 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.851, |
| "2": 0.881, |
| "3": 0.653, |
| "5": 0.835, |
| "6": 0.719 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.8920000195503235, |
| "5.0": 0.8910000324249268 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.893000066280365, |
| "5.0": 0.8960000276565552 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.616, |
| "5.0": 0.616 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.656, |
| "5.0": 0.656 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.721, |
| "5.0": 0.721 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.64, |
| "5.0": 0.64 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.725, |
| "5.0": 0.725 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.786, |
| "5.0": 0.786 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9580000638961792, |
| "Python": 0.987000048160553, |
| "HTML": 0.9860000610351562, |
| "Java": 0.9610000252723694, |
| "PHP": 0.9530000686645508 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9650000333786011, |
| "Python": 0.9860000610351562, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9650000333786011, |
| "PHP": 0.9460000395774841 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.805, |
| "Python": 0.664, |
| "HTML": 0.621, |
| "Java": 0.618, |
| "PHP": 0.588 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.849, |
| "Python": 0.74, |
| "HTML": 0.941, |
| "Java": 0.755, |
| "PHP": 0.665 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.872, |
| "Python": 0.85, |
| "HTML": 0.956, |
| "Java": 0.756, |
| "PHP": 0.79 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.625, |
| "Python": 0.658, |
| "HTML": 0.936, |
| "Java": 0.64, |
| "PHP": 0.898 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.708, |
| "Python": 0.896, |
| "HTML": 0.951, |
| "Java": 0.648, |
| "PHP": 0.902 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.742, |
| "Python": 0.931, |
| "HTML": 0.95, |
| "Java": 0.907, |
| "PHP": 0.913 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9390000700950623, |
| "1": 0.9790000319480896, |
| "2": 0.9190000295639038, |
| "3": 0.9450000524520874 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9480000734329224, |
| "1": 0.987000048160553, |
| "2": 0.9250000715255737, |
| "3": 0.9380000233650208 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.754, |
| "1": 0.761, |
| "2": 0.683, |
| "3": 0.718 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.761, |
| "1": 0.889, |
| "2": 0.748, |
| "3": 0.805 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.876, |
| "1": 0.947, |
| "2": 0.824, |
| "3": 0.829 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.796, |
| "1": 0.958, |
| "2": 0.609, |
| "3": 0.636 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.772, |
| "1": 0.953, |
| "2": 0.763, |
| "3": 0.751 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.837, |
| "1": 0.97, |
| "2": 0.851, |
| "3": 0.861 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9970000386238098, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.844, |
| "fr": 0.649, |
| "de": 0.856, |
| "es": 0.713, |
| "nl": 0.866 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.987, |
| "fr": 0.727, |
| "de": 0.859, |
| "es": 0.949, |
| "nl": 0.866 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.994, |
| "fr": 0.96, |
| "de": 0.887, |
| "es": 0.981, |
| "nl": 0.952 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.908, |
| "fr": 0.841, |
| "de": 1.0, |
| "es": 0.852, |
| "nl": 1.0 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.996, |
| "de": 1.0, |
| "es": 0.858, |
| "nl": 0.999 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.997, |
| "nl": 0.999 |
| } |
| } |
| } |
| } |