| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753939106, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.957375044748187, |
| "llm_top_1_test_accuracy": 0.6615, |
| "llm_top_2_test_accuracy": 0.7375375, |
| "llm_top_5_test_accuracy": 0.7839499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.953193797916174, |
| "sae_top_1_test_accuracy": 0.77345, |
| "sae_top_2_test_accuracy": 0.8163124999999999, |
| "sae_top_5_test_accuracy": 0.8835437500000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9702000498771668, |
| "llm_top_1_test_accuracy": 0.6587999999999999, |
| "llm_top_2_test_accuracy": 0.7306, |
| "llm_top_5_test_accuracy": 0.7682, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9670000553131104, |
| "sae_top_1_test_accuracy": 0.8039999999999999, |
| "sae_top_2_test_accuracy": 0.8539999999999999, |
| "sae_top_5_test_accuracy": 0.8886, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9538000583648681, |
| "llm_top_1_test_accuracy": 0.6644, |
| "llm_top_2_test_accuracy": 0.7294, |
| "llm_top_5_test_accuracy": 0.7636000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9472000479698182, |
| "sae_top_1_test_accuracy": 0.7642, |
| "sae_top_2_test_accuracy": 0.7969999999999999, |
| "sae_top_5_test_accuracy": 0.8804000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9312000393867492, |
| "llm_top_1_test_accuracy": 0.6848000000000001, |
| "llm_top_2_test_accuracy": 0.726, |
| "llm_top_5_test_accuracy": 0.7602, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9280000567436218, |
| "sae_top_1_test_accuracy": 0.8119999999999999, |
| "sae_top_2_test_accuracy": 0.8314, |
| "sae_top_5_test_accuracy": 0.8744, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9240000486373902, |
| "llm_top_1_test_accuracy": 0.6288, |
| "llm_top_2_test_accuracy": 0.6561999999999999, |
| "llm_top_5_test_accuracy": 0.6958, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9248000502586364, |
| "sae_top_1_test_accuracy": 0.7686, |
| "sae_top_2_test_accuracy": 0.8061999999999999, |
| "sae_top_5_test_accuracy": 0.8418000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9570000469684601, |
| "llm_top_1_test_accuracy": 0.641, |
| "llm_top_2_test_accuracy": 0.701, |
| "llm_top_5_test_accuracy": 0.746, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9390000402927399, |
| "sae_top_1_test_accuracy": 0.6, |
| "sae_top_2_test_accuracy": 0.73, |
| "sae_top_5_test_accuracy": 0.84, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9712000489234924, |
| "llm_top_1_test_accuracy": 0.6437999999999999, |
| "llm_top_2_test_accuracy": 0.7306000000000001, |
| "llm_top_5_test_accuracy": 0.7847999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9700000286102295, |
| "sae_top_1_test_accuracy": 0.6948000000000001, |
| "sae_top_2_test_accuracy": 0.6756, |
| "sae_top_5_test_accuracy": 0.8413999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9520000368356705, |
| "llm_top_1_test_accuracy": 0.641, |
| "llm_top_2_test_accuracy": 0.7605, |
| "llm_top_5_test_accuracy": 0.826, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9507500529289246, |
| "sae_top_1_test_accuracy": 0.7859999999999999, |
| "sae_top_2_test_accuracy": 0.8384999999999999, |
| "sae_top_5_test_accuracy": 0.9027499999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9996000289916992, |
| "llm_top_1_test_accuracy": 0.7294, |
| "llm_top_2_test_accuracy": 0.866, |
| "llm_top_5_test_accuracy": 0.9269999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9988000512123107, |
| "sae_top_1_test_accuracy": 0.958, |
| "sae_top_2_test_accuracy": 0.9978, |
| "sae_top_5_test_accuracy": 0.999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.7.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.7.hook_resid_post", |
| "hook_layer": 7, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9440000653266907, |
| "1": 0.9630000591278076, |
| "2": 0.9550000429153442, |
| "6": 0.9900000691413879, |
| "9": 0.9830000400543213 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9550000429153442, |
| "1": 0.968000054359436, |
| "2": 0.9540000557899475, |
| "6": 0.9920000433921814, |
| "9": 0.9820000529289246 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.592, |
| "1": 0.632, |
| "2": 0.6, |
| "6": 0.776, |
| "9": 0.694 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.672, |
| "1": 0.651, |
| "2": 0.696, |
| "6": 0.79, |
| "9": 0.844 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.688, |
| "1": 0.744, |
| "2": 0.747, |
| "6": 0.803, |
| "9": 0.859 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.623, |
| "1": 0.638, |
| "2": 0.84, |
| "6": 0.977, |
| "9": 0.942 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.67, |
| "1": 0.807, |
| "2": 0.863, |
| "6": 0.98, |
| "9": 0.95 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.773, |
| "1": 0.822, |
| "2": 0.907, |
| "6": 0.989, |
| "9": 0.952 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9480000734329224, |
| "13": 0.9550000429153442, |
| "14": 0.9500000476837158, |
| "18": 0.9190000295639038, |
| "19": 0.9640000462532043 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9630000591278076, |
| "13": 0.9530000686645508, |
| "14": 0.9580000638961792, |
| "18": 0.9360000491142273, |
| "19": 0.9590000510215759 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.558, |
| "13": 0.67, |
| "14": 0.654, |
| "18": 0.663, |
| "19": 0.777 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.771, |
| "13": 0.705, |
| "14": 0.684, |
| "18": 0.715, |
| "19": 0.772 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.778, |
| "13": 0.756, |
| "14": 0.708, |
| "18": 0.721, |
| "19": 0.855 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.734, |
| "13": 0.682, |
| "14": 0.886, |
| "18": 0.678, |
| "19": 0.841 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.737, |
| "13": 0.772, |
| "14": 0.896, |
| "18": 0.738, |
| "19": 0.842 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.924, |
| "13": 0.814, |
| "14": 0.876, |
| "18": 0.9, |
| "19": 0.888 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9540000557899475, |
| "21": 0.9190000295639038, |
| "22": 0.9250000715255737, |
| "25": 0.9540000557899475, |
| "26": 0.8880000710487366 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.956000030040741, |
| "21": 0.9220000505447388, |
| "22": 0.9230000376701355, |
| "25": 0.9660000205039978, |
| "26": 0.8890000581741333 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.718, |
| "21": 0.726, |
| "22": 0.631, |
| "25": 0.718, |
| "26": 0.631 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.81, |
| "21": 0.758, |
| "22": 0.66, |
| "25": 0.727, |
| "26": 0.675 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.828, |
| "21": 0.772, |
| "22": 0.705, |
| "25": 0.83, |
| "26": 0.666 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.841, |
| "21": 0.828, |
| "22": 0.898, |
| "25": 0.853, |
| "26": 0.64 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.861, |
| "21": 0.829, |
| "22": 0.895, |
| "25": 0.848, |
| "26": 0.724 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.915, |
| "21": 0.858, |
| "22": 0.893, |
| "25": 0.912, |
| "26": 0.794 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9540000557899475, |
| "2": 0.9360000491142273, |
| "3": 0.9190000295639038, |
| "5": 0.9310000538825989, |
| "6": 0.8840000629425049 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9410000443458557, |
| "2": 0.9390000700950623, |
| "3": 0.9260000586509705, |
| "5": 0.940000057220459, |
| "6": 0.8740000128746033 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.722, |
| "2": 0.631, |
| "3": 0.621, |
| "5": 0.562, |
| "6": 0.608 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.77, |
| "2": 0.638, |
| "3": 0.613, |
| "5": 0.622, |
| "6": 0.638 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.794, |
| "2": 0.755, |
| "3": 0.632, |
| "5": 0.623, |
| "6": 0.675 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.821, |
| "2": 0.817, |
| "3": 0.638, |
| "5": 0.857, |
| "6": 0.71 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.87, |
| "2": 0.821, |
| "3": 0.705, |
| "5": 0.89, |
| "6": 0.745 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.914, |
| "2": 0.875, |
| "3": 0.749, |
| "5": 0.906, |
| "6": 0.765 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.9380000233650208, |
| "5.0": 0.940000057220459 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9550000429153442, |
| "5.0": 0.9590000510215759 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.641, |
| "5.0": 0.641 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.701, |
| "5.0": 0.701 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.746, |
| "5.0": 0.746 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.6, |
| "5.0": 0.6 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.73, |
| "5.0": 0.73 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.84, |
| "5.0": 0.84 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9570000171661377, |
| "Python": 0.9790000319480896, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9650000333786011, |
| "PHP": 0.9600000381469727 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9580000638961792, |
| "Python": 0.9880000352859497, |
| "HTML": 0.9850000739097595, |
| "Java": 0.9610000252723694, |
| "PHP": 0.9640000462532043 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.572, |
| "Python": 0.622, |
| "HTML": 0.797, |
| "Java": 0.622, |
| "PHP": 0.606 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.68, |
| "Python": 0.708, |
| "HTML": 0.926, |
| "Java": 0.7, |
| "PHP": 0.639 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.786, |
| "Python": 0.734, |
| "HTML": 0.953, |
| "Java": 0.774, |
| "PHP": 0.677 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.638, |
| "Python": 0.638, |
| "HTML": 0.699, |
| "Java": 0.665, |
| "PHP": 0.834 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.675, |
| "Python": 0.64, |
| "HTML": 0.837, |
| "Java": 0.647, |
| "PHP": 0.579 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.817, |
| "Python": 0.951, |
| "HTML": 0.947, |
| "Java": 0.657, |
| "PHP": 0.835 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.984000027179718, |
| "2": 0.9310000538825989, |
| "3": 0.9480000734329224 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9420000314712524, |
| "1": 0.9890000224113464, |
| "2": 0.9290000200271606, |
| "3": 0.9480000734329224 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.608, |
| "1": 0.679, |
| "2": 0.651, |
| "3": 0.626 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.794, |
| "1": 0.803, |
| "2": 0.697, |
| "3": 0.748 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.819, |
| "1": 0.864, |
| "2": 0.767, |
| "3": 0.854 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.822, |
| "1": 0.97, |
| "2": 0.606, |
| "3": 0.746 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.851, |
| "1": 0.97, |
| "2": 0.774, |
| "3": 0.759 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.896, |
| "1": 0.977, |
| "2": 0.857, |
| "3": 0.881 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 0.9970000386238098, |
| "nl": 0.999000072479248 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.833, |
| "fr": 0.598, |
| "de": 0.684, |
| "es": 0.909, |
| "nl": 0.623 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.858, |
| "fr": 0.885, |
| "de": 0.81, |
| "es": 0.964, |
| "nl": 0.813 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.929, |
| "fr": 0.962, |
| "de": 0.883, |
| "es": 0.988, |
| "nl": 0.873 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.98, |
| "de": 1.0, |
| "es": 0.996, |
| "nl": 0.815 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.997, |
| "fr": 0.997, |
| "de": 1.0, |
| "es": 0.996, |
| "nl": 0.999 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.998, |
| "de": 1.0, |
| "es": 0.999, |
| "nl": 0.999 |
| } |
| } |
| } |
| } |