| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745754512482, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9581000424921513, |
| "llm_top_1_test_accuracy": 0.6694562500000001, |
| "llm_top_2_test_accuracy": 0.71423125, |
| "llm_top_5_test_accuracy": 0.771375, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9540125455707311, |
| "sae_top_1_test_accuracy": 0.73803125, |
| "sae_top_2_test_accuracy": 0.8065937499999999, |
| "sae_top_5_test_accuracy": 0.8867437499999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9668000340461731, |
| "llm_top_1_test_accuracy": 0.6692, |
| "llm_top_2_test_accuracy": 0.6914, |
| "llm_top_5_test_accuracy": 0.7702, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9590000510215759, |
| "sae_top_1_test_accuracy": 0.761, |
| "sae_top_2_test_accuracy": 0.7788000000000002, |
| "sae_top_5_test_accuracy": 0.8400000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9554000496864319, |
| "llm_top_1_test_accuracy": 0.6694, |
| "llm_top_2_test_accuracy": 0.7108000000000001, |
| "llm_top_5_test_accuracy": 0.7605999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9470000386238098, |
| "sae_top_1_test_accuracy": 0.7244, |
| "sae_top_2_test_accuracy": 0.7718, |
| "sae_top_5_test_accuracy": 0.8438000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9288000583648681, |
| "llm_top_1_test_accuracy": 0.6958, |
| "llm_top_2_test_accuracy": 0.731, |
| "llm_top_5_test_accuracy": 0.7586, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9256000399589539, |
| "sae_top_1_test_accuracy": 0.652, |
| "sae_top_2_test_accuracy": 0.7776, |
| "sae_top_5_test_accuracy": 0.8568, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9176000475883483, |
| "llm_top_1_test_accuracy": 0.5853999999999999, |
| "llm_top_2_test_accuracy": 0.6020000000000001, |
| "llm_top_5_test_accuracy": 0.649, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9098000407218934, |
| "sae_top_1_test_accuracy": 0.6858000000000001, |
| "sae_top_2_test_accuracy": 0.7746, |
| "sae_top_5_test_accuracy": 0.8192, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9775000512599945, |
| "llm_top_1_test_accuracy": 0.686, |
| "llm_top_2_test_accuracy": 0.74, |
| "llm_top_5_test_accuracy": 0.783, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9725000560283661, |
| "sae_top_1_test_accuracy": 0.8, |
| "sae_top_2_test_accuracy": 0.844, |
| "sae_top_5_test_accuracy": 0.926, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.970400047302246, |
| "llm_top_1_test_accuracy": 0.632, |
| "llm_top_2_test_accuracy": 0.6966, |
| "llm_top_5_test_accuracy": 0.7514000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.968600046634674, |
| "sae_top_1_test_accuracy": 0.6146, |
| "sae_top_2_test_accuracy": 0.7302, |
| "sae_top_5_test_accuracy": 0.9276, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9485000371932983, |
| "llm_top_1_test_accuracy": 0.69625, |
| "llm_top_2_test_accuracy": 0.7602500000000001, |
| "llm_top_5_test_accuracy": 0.816, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9510000497102737, |
| "sae_top_1_test_accuracy": 0.74225, |
| "sae_top_2_test_accuracy": 0.8317499999999999, |
| "sae_top_5_test_accuracy": 0.88275, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.7216000000000001, |
| "llm_top_2_test_accuracy": 0.7817999999999999, |
| "llm_top_5_test_accuracy": 0.8822000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9986000418663025, |
| "sae_top_1_test_accuracy": 0.9242000000000001, |
| "sae_top_2_test_accuracy": 0.944, |
| "sae_top_5_test_accuracy": 0.9978, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.14.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.14.hook_resid_post", |
| "hook_layer": 14, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9530000686645508, |
| "2": 0.9470000267028809, |
| "6": 0.987000048160553, |
| "9": 0.968000054359436 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9430000185966492, |
| "1": 0.9640000462532043, |
| "2": 0.9610000252723694, |
| "6": 0.9920000433921814, |
| "9": 0.9740000367164612 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.569, |
| "1": 0.646, |
| "2": 0.664, |
| "6": 0.801, |
| "9": 0.666 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.573, |
| "1": 0.637, |
| "2": 0.732, |
| "6": 0.817, |
| "9": 0.698 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.651, |
| "1": 0.678, |
| "2": 0.761, |
| "6": 0.9, |
| "9": 0.861 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.578, |
| "1": 0.652, |
| "2": 0.868, |
| "6": 0.796, |
| "9": 0.911 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.628, |
| "1": 0.652, |
| "2": 0.891, |
| "6": 0.812, |
| "9": 0.911 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.679, |
| "1": 0.78, |
| "2": 0.888, |
| "6": 0.902, |
| "9": 0.951 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9630000591278076, |
| "13": 0.9500000476837158, |
| "14": 0.9460000395774841, |
| "18": 0.9150000214576721, |
| "19": 0.9610000252723694 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9640000462532043, |
| "13": 0.9550000429153442, |
| "14": 0.9630000591278076, |
| "18": 0.9360000491142273, |
| "19": 0.9590000510215759 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.545, |
| "13": 0.666, |
| "14": 0.674, |
| "18": 0.679, |
| "19": 0.783 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.676, |
| "13": 0.679, |
| "14": 0.688, |
| "18": 0.733, |
| "19": 0.778 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.814, |
| "13": 0.728, |
| "14": 0.713, |
| "18": 0.723, |
| "19": 0.825 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.85, |
| "13": 0.672, |
| "14": 0.646, |
| "18": 0.687, |
| "19": 0.767 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.853, |
| "13": 0.673, |
| "14": 0.836, |
| "18": 0.69, |
| "19": 0.807 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.866, |
| "13": 0.807, |
| "14": 0.884, |
| "18": 0.781, |
| "19": 0.881 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.956000030040741, |
| "21": 0.9200000166893005, |
| "22": 0.9140000343322754, |
| "25": 0.9590000510215759, |
| "26": 0.8790000677108765 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9620000720024109, |
| "21": 0.9180000424385071, |
| "22": 0.9080000519752502, |
| "25": 0.9580000638961792, |
| "26": 0.8980000615119934 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.713, |
| "21": 0.784, |
| "22": 0.653, |
| "25": 0.718, |
| "26": 0.611 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.787, |
| "21": 0.762, |
| "22": 0.679, |
| "25": 0.75, |
| "26": 0.677 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.852, |
| "21": 0.788, |
| "22": 0.694, |
| "25": 0.784, |
| "26": 0.675 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.834, |
| "21": 0.521, |
| "22": 0.581, |
| "25": 0.716, |
| "26": 0.608 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.899, |
| "21": 0.783, |
| "22": 0.813, |
| "25": 0.72, |
| "26": 0.673 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.921, |
| "21": 0.819, |
| "22": 0.893, |
| "25": 0.861, |
| "26": 0.79 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9440000653266907, |
| "2": 0.9200000166893005, |
| "3": 0.909000039100647, |
| "5": 0.9140000343322754, |
| "6": 0.862000048160553 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9520000219345093, |
| "2": 0.9270000457763672, |
| "3": 0.909000039100647, |
| "5": 0.9250000715255737, |
| "6": 0.8750000596046448 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.62, |
| "2": 0.581, |
| "3": 0.582, |
| "5": 0.554, |
| "6": 0.59 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.628, |
| "2": 0.622, |
| "3": 0.589, |
| "5": 0.559, |
| "6": 0.612 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.698, |
| "2": 0.632, |
| "3": 0.638, |
| "5": 0.604, |
| "6": 0.673 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.629, |
| "2": 0.624, |
| "3": 0.559, |
| "5": 0.887, |
| "6": 0.73 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.782, |
| "2": 0.892, |
| "3": 0.586, |
| "5": 0.888, |
| "6": 0.725 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.887, |
| "2": 0.896, |
| "3": 0.686, |
| "5": 0.89, |
| "6": 0.737 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9720000624656677, |
| "5.0": 0.9730000495910645 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9770000576972961, |
| "5.0": 0.9780000448226929 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.686, |
| "5.0": 0.686 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.74, |
| "5.0": 0.74 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.783, |
| "5.0": 0.783 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.8, |
| "5.0": 0.8 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.844, |
| "5.0": 0.844 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.926, |
| "5.0": 0.926 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9510000348091125, |
| "Python": 0.9820000529289246, |
| "HTML": 0.987000048160553, |
| "Java": 0.9630000591278076, |
| "PHP": 0.9600000381469727 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9530000686645508, |
| "Python": 0.9880000352859497, |
| "HTML": 0.987000048160553, |
| "Java": 0.9690000414848328, |
| "PHP": 0.9550000429153442 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.576, |
| "Python": 0.623, |
| "HTML": 0.726, |
| "Java": 0.648, |
| "PHP": 0.587 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.69, |
| "Python": 0.678, |
| "HTML": 0.813, |
| "Java": 0.66, |
| "PHP": 0.642 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.694, |
| "Python": 0.72, |
| "HTML": 0.941, |
| "Java": 0.722, |
| "PHP": 0.68 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.613, |
| "Python": 0.615, |
| "HTML": 0.585, |
| "Java": 0.663, |
| "PHP": 0.597 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.626, |
| "Python": 0.953, |
| "HTML": 0.859, |
| "Java": 0.625, |
| "PHP": 0.588 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.868, |
| "Python": 0.962, |
| "HTML": 0.952, |
| "Java": 0.921, |
| "PHP": 0.935 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9350000619888306, |
| "1": 0.9780000448226929, |
| "2": 0.9470000267028809, |
| "3": 0.9440000653266907 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.937000036239624, |
| "1": 0.9880000352859497, |
| "2": 0.9270000457763672, |
| "3": 0.9420000314712524 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.691, |
| "1": 0.763, |
| "2": 0.653, |
| "3": 0.678 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.809, |
| "1": 0.792, |
| "2": 0.66, |
| "3": 0.78 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.82, |
| "1": 0.877, |
| "2": 0.739, |
| "3": 0.828 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.717, |
| "1": 0.818, |
| "2": 0.777, |
| "3": 0.657 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.76, |
| "1": 0.939, |
| "2": 0.837, |
| "3": 0.791 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.846, |
| "1": 0.961, |
| "2": 0.862, |
| "3": 0.862 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 0.9980000257492065, |
| "de": 0.9970000386238098, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.843, |
| "fr": 0.627, |
| "de": 0.574, |
| "es": 0.909, |
| "nl": 0.655 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.856, |
| "fr": 0.603, |
| "de": 0.778, |
| "es": 0.933, |
| "nl": 0.739 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.938, |
| "fr": 0.897, |
| "de": 0.85, |
| "es": 0.979, |
| "nl": 0.747 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.99, |
| "de": 0.996, |
| "es": 0.996, |
| "nl": 0.641 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.995, |
| "de": 0.997, |
| "es": 0.993, |
| "nl": 0.737 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.996, |
| "nl": 0.997 |
| } |
| } |
| } |
| } |