| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754673652, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9599187940359115, |
| "llm_top_1_test_accuracy": 0.6718624999999999, |
| "llm_top_2_test_accuracy": 0.74746875, |
| "llm_top_5_test_accuracy": 0.8055625, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9588000416755676, |
| "sae_top_1_test_accuracy": 0.8110812499999999, |
| "sae_top_2_test_accuracy": 0.8523749999999999, |
| "sae_top_5_test_accuracy": 0.90629375, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9708000421524048, |
| "llm_top_1_test_accuracy": 0.6684, |
| "llm_top_2_test_accuracy": 0.7188000000000001, |
| "llm_top_5_test_accuracy": 0.7924, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9634000420570373, |
| "sae_top_1_test_accuracy": 0.8442000000000001, |
| "sae_top_2_test_accuracy": 0.8724000000000001, |
| "sae_top_5_test_accuracy": 0.8968, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9522000551223755, |
| "llm_top_1_test_accuracy": 0.6554, |
| "llm_top_2_test_accuracy": 0.7074, |
| "llm_top_5_test_accuracy": 0.7704000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9514000535011291, |
| "sae_top_1_test_accuracy": 0.7734, |
| "sae_top_2_test_accuracy": 0.8267999999999999, |
| "sae_top_5_test_accuracy": 0.9044000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9322000503540039, |
| "llm_top_1_test_accuracy": 0.6994, |
| "llm_top_2_test_accuracy": 0.74, |
| "llm_top_5_test_accuracy": 0.7749999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9312000393867492, |
| "sae_top_1_test_accuracy": 0.812, |
| "sae_top_2_test_accuracy": 0.8470000000000001, |
| "sae_top_5_test_accuracy": 0.8678000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9258000493049622, |
| "llm_top_1_test_accuracy": 0.6208, |
| "llm_top_2_test_accuracy": 0.686, |
| "llm_top_5_test_accuracy": 0.7489999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9298000454902648, |
| "sae_top_1_test_accuracy": 0.7348, |
| "sae_top_2_test_accuracy": 0.7931999999999999, |
| "sae_top_5_test_accuracy": 0.8315999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9770000576972961, |
| "llm_top_1_test_accuracy": 0.683, |
| "llm_top_2_test_accuracy": 0.736, |
| "llm_top_5_test_accuracy": 0.777, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9745000302791595, |
| "sae_top_1_test_accuracy": 0.839, |
| "sae_top_2_test_accuracy": 0.847, |
| "sae_top_5_test_accuracy": 0.943, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9698000431060791, |
| "llm_top_1_test_accuracy": 0.6237999999999999, |
| "llm_top_2_test_accuracy": 0.706, |
| "llm_top_5_test_accuracy": 0.7714000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9706000566482544, |
| "sae_top_1_test_accuracy": 0.7475999999999999, |
| "sae_top_2_test_accuracy": 0.8286, |
| "sae_top_5_test_accuracy": 0.9263999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9517500400543213, |
| "llm_top_1_test_accuracy": 0.6545, |
| "llm_top_2_test_accuracy": 0.76275, |
| "llm_top_5_test_accuracy": 0.8244999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9505000412464142, |
| "sae_top_1_test_accuracy": 0.75625, |
| "sae_top_2_test_accuracy": 0.811, |
| "sae_top_5_test_accuracy": 0.88275, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.7696, |
| "llm_top_2_test_accuracy": 0.9228, |
| "llm_top_5_test_accuracy": 0.9847999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9990000247955322, |
| "sae_top_1_test_accuracy": 0.9814, |
| "sae_top_2_test_accuracy": 0.993, |
| "sae_top_5_test_accuracy": 0.9976, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.16.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.16.hook_resid_post", |
| "hook_layer": 16, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9470000267028809, |
| "1": 0.9600000381469727, |
| "2": 0.9500000476837158, |
| "6": 0.9930000305175781, |
| "9": 0.9670000672340393 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.956000030040741, |
| "1": 0.9650000333786011, |
| "2": 0.9620000720024109, |
| "6": 0.9920000433921814, |
| "9": 0.9790000319480896 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.572, |
| "1": 0.622, |
| "2": 0.668, |
| "6": 0.754, |
| "9": 0.726 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.57, |
| "1": 0.674, |
| "2": 0.762, |
| "6": 0.862, |
| "9": 0.726 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.742, |
| "1": 0.667, |
| "2": 0.784, |
| "6": 0.889, |
| "9": 0.88 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.865, |
| "1": 0.636, |
| "2": 0.808, |
| "6": 0.978, |
| "9": 0.934 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.865, |
| "1": 0.73, |
| "2": 0.844, |
| "6": 0.98, |
| "9": 0.943 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.872, |
| "1": 0.819, |
| "2": 0.859, |
| "6": 0.99, |
| "9": 0.944 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9580000638961792, |
| "13": 0.9640000462532043, |
| "14": 0.956000030040741, |
| "18": 0.9170000553131104, |
| "19": 0.9620000720024109 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9620000720024109, |
| "13": 0.9450000524520874, |
| "14": 0.9580000638961792, |
| "18": 0.9280000329017639, |
| "19": 0.968000054359436 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.533, |
| "13": 0.653, |
| "14": 0.643, |
| "18": 0.687, |
| "19": 0.761 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.667, |
| "13": 0.712, |
| "14": 0.668, |
| "18": 0.733, |
| "19": 0.757 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.805, |
| "13": 0.758, |
| "14": 0.721, |
| "18": 0.726, |
| "19": 0.842 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.739, |
| "13": 0.664, |
| "14": 0.895, |
| "18": 0.702, |
| "19": 0.867 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.852, |
| "13": 0.746, |
| "14": 0.896, |
| "18": 0.743, |
| "19": 0.897 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.933, |
| "13": 0.893, |
| "14": 0.883, |
| "18": 0.912, |
| "19": 0.901 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9600000381469727, |
| "21": 0.9180000424385071, |
| "22": 0.9200000166893005, |
| "25": 0.9620000720024109, |
| "26": 0.8960000276565552 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9640000462532043, |
| "21": 0.9240000247955322, |
| "22": 0.9160000681877136, |
| "25": 0.9640000462532043, |
| "26": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.716, |
| "21": 0.771, |
| "22": 0.664, |
| "25": 0.68, |
| "26": 0.666 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.812, |
| "21": 0.768, |
| "22": 0.688, |
| "25": 0.769, |
| "26": 0.663 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.851, |
| "21": 0.821, |
| "22": 0.702, |
| "25": 0.794, |
| "26": 0.707 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.868, |
| "21": 0.78, |
| "22": 0.878, |
| "25": 0.873, |
| "26": 0.661 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.91, |
| "21": 0.836, |
| "22": 0.88, |
| "25": 0.869, |
| "26": 0.74 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.914, |
| "21": 0.845, |
| "22": 0.879, |
| "25": 0.907, |
| "26": 0.794 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9580000638961792, |
| "2": 0.9390000700950623, |
| "3": 0.9290000200271606, |
| "5": 0.9320000410079956, |
| "6": 0.8910000324249268 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9580000638961792, |
| "2": 0.940000057220459, |
| "3": 0.9380000233650208, |
| "5": 0.9220000505447388, |
| "6": 0.8710000514984131 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.658, |
| "2": 0.618, |
| "3": 0.563, |
| "5": 0.579, |
| "6": 0.686 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.674, |
| "2": 0.783, |
| "3": 0.606, |
| "5": 0.665, |
| "6": 0.702 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.805, |
| "2": 0.813, |
| "3": 0.658, |
| "5": 0.735, |
| "6": 0.734 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.718, |
| "2": 0.836, |
| "3": 0.64, |
| "5": 0.839, |
| "6": 0.641 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.827, |
| "2": 0.849, |
| "3": 0.709, |
| "5": 0.854, |
| "6": 0.727 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.866, |
| "2": 0.857, |
| "3": 0.776, |
| "5": 0.899, |
| "6": 0.76 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9750000238418579, |
| "5.0": 0.9740000367164612 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9770000576972961, |
| "5.0": 0.9770000576972961 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.683, |
| "5.0": 0.683 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.736, |
| "5.0": 0.736 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.777, |
| "5.0": 0.777 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.839, |
| "5.0": 0.839 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.847, |
| "5.0": 0.847 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.943, |
| "5.0": 0.943 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9610000252723694, |
| "Python": 0.9850000739097595, |
| "HTML": 0.987000048160553, |
| "Java": 0.9620000720024109, |
| "PHP": 0.9580000638961792 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9450000524520874, |
| "Python": 0.9920000433921814, |
| "HTML": 0.9910000562667847, |
| "Java": 0.9640000462532043, |
| "PHP": 0.9570000171661377 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.568, |
| "Python": 0.609, |
| "HTML": 0.769, |
| "Java": 0.602, |
| "PHP": 0.571 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.683, |
| "Python": 0.693, |
| "HTML": 0.799, |
| "Java": 0.691, |
| "PHP": 0.664 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.769, |
| "Python": 0.724, |
| "HTML": 0.922, |
| "Java": 0.729, |
| "PHP": 0.713 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.858, |
| "Python": 0.639, |
| "HTML": 0.715, |
| "Java": 0.613, |
| "PHP": 0.913 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.884, |
| "Python": 0.942, |
| "HTML": 0.771, |
| "Java": 0.626, |
| "PHP": 0.92 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.89, |
| "Python": 0.959, |
| "HTML": 0.95, |
| "Java": 0.894, |
| "PHP": 0.939 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9890000224113464, |
| "2": 0.9230000376701355, |
| "3": 0.9500000476837158 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9340000152587891, |
| "1": 0.987000048160553, |
| "2": 0.9360000491142273, |
| "3": 0.9500000476837158 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.7, |
| "1": 0.647, |
| "2": 0.572, |
| "3": 0.699 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.818, |
| "1": 0.807, |
| "2": 0.67, |
| "3": 0.756 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.834, |
| "1": 0.877, |
| "2": 0.748, |
| "3": 0.839 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.845, |
| "1": 0.927, |
| "2": 0.559, |
| "3": 0.694 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.854, |
| "1": 0.925, |
| "2": 0.732, |
| "3": 0.733 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.865, |
| "1": 0.976, |
| "2": 0.824, |
| "3": 0.866 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9980000257492065, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 0.9980000257492065, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 1.0, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.916, |
| "fr": 0.625, |
| "de": 0.612, |
| "es": 0.979, |
| "nl": 0.716 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.921, |
| "fr": 0.939, |
| "de": 0.981, |
| "es": 0.987, |
| "nl": 0.786 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.945, |
| "fr": 0.994, |
| "de": 0.992, |
| "es": 0.994, |
| "nl": 0.999 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.917, |
| "fr": 0.995, |
| "de": 0.999, |
| "es": 0.997, |
| "nl": 0.999 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.973, |
| "fr": 0.997, |
| "de": 0.999, |
| "es": 0.997, |
| "nl": 0.999 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.995, |
| "fr": 0.997, |
| "de": 0.998, |
| "es": 0.998, |
| "nl": 1.0 |
| } |
| } |
| } |
| } |