| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753694950, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9466937903314828, |
| "llm_top_1_test_accuracy": 0.69093125, |
| "llm_top_2_test_accuracy": 0.7403625, |
| "llm_top_5_test_accuracy": 0.79135, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9433187898248434, |
| "sae_top_1_test_accuracy": 0.7603125, |
| "sae_top_2_test_accuracy": 0.81953125, |
| "sae_top_5_test_accuracy": 0.8717812500000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.963200044631958, |
| "llm_top_1_test_accuracy": 0.6910000000000001, |
| "llm_top_2_test_accuracy": 0.7432000000000001, |
| "llm_top_5_test_accuracy": 0.7888000000000002, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9588000535964966, |
| "sae_top_1_test_accuracy": 0.8006, |
| "sae_top_2_test_accuracy": 0.9002000000000001, |
| "sae_top_5_test_accuracy": 0.9186, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9478000402450562, |
| "llm_top_1_test_accuracy": 0.705, |
| "llm_top_2_test_accuracy": 0.7384000000000001, |
| "llm_top_5_test_accuracy": 0.7935999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9452000379562377, |
| "sae_top_1_test_accuracy": 0.7128, |
| "sae_top_2_test_accuracy": 0.7888, |
| "sae_top_5_test_accuracy": 0.8582000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9212000370025635, |
| "llm_top_1_test_accuracy": 0.6607999999999999, |
| "llm_top_2_test_accuracy": 0.7044, |
| "llm_top_5_test_accuracy": 0.7558, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9162000417709351, |
| "sae_top_1_test_accuracy": 0.7856000000000001, |
| "sae_top_2_test_accuracy": 0.8376000000000001, |
| "sae_top_5_test_accuracy": 0.8533999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9076000332832337, |
| "llm_top_1_test_accuracy": 0.6607999999999999, |
| "llm_top_2_test_accuracy": 0.6894, |
| "llm_top_5_test_accuracy": 0.7236, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.90840003490448, |
| "sae_top_1_test_accuracy": 0.7083999999999999, |
| "sae_top_2_test_accuracy": 0.76, |
| "sae_top_5_test_accuracy": 0.8152000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.9170000553131104, |
| "llm_top_1_test_accuracy": 0.63, |
| "llm_top_2_test_accuracy": 0.654, |
| "llm_top_5_test_accuracy": 0.668, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9075000286102295, |
| "sae_top_1_test_accuracy": 0.691, |
| "sae_top_2_test_accuracy": 0.735, |
| "sae_top_5_test_accuracy": 0.819, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.969200050830841, |
| "llm_top_1_test_accuracy": 0.631, |
| "llm_top_2_test_accuracy": 0.7223999999999999, |
| "llm_top_5_test_accuracy": 0.8019999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9676000475883484, |
| "sae_top_1_test_accuracy": 0.6763999999999999, |
| "sae_top_2_test_accuracy": 0.7544000000000001, |
| "sae_top_5_test_accuracy": 0.8506, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9477500468492508, |
| "llm_top_1_test_accuracy": 0.7222500000000001, |
| "llm_top_2_test_accuracy": 0.7675000000000001, |
| "llm_top_5_test_accuracy": 0.838, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9432500451803207, |
| "sae_top_1_test_accuracy": 0.7815, |
| "sae_top_2_test_accuracy": 0.82125, |
| "sae_top_5_test_accuracy": 0.8632500000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9998000144958497, |
| "llm_top_1_test_accuracy": 0.8266, |
| "llm_top_2_test_accuracy": 0.9036, |
| "llm_top_5_test_accuracy": 0.961, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9996000289916992, |
| "sae_top_1_test_accuracy": 0.9262, |
| "sae_top_2_test_accuracy": 0.959, |
| "sae_top_5_test_accuracy": 0.9960000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.4.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.4.hook_resid_post", |
| "hook_layer": 4, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.940000057220459, |
| "1": 0.9550000429153442, |
| "2": 0.9390000700950623, |
| "6": 0.9860000610351562, |
| "9": 0.9740000367164612 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9440000653266907, |
| "1": 0.9540000557899475, |
| "2": 0.9460000395774841, |
| "6": 0.9940000176429749, |
| "9": 0.9780000448226929 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.617, |
| "1": 0.648, |
| "2": 0.723, |
| "6": 0.787, |
| "9": 0.68 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.745, |
| "1": 0.644, |
| "2": 0.762, |
| "6": 0.785, |
| "9": 0.78 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.795, |
| "1": 0.688, |
| "2": 0.752, |
| "6": 0.905, |
| "9": 0.804 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.634, |
| "1": 0.589, |
| "2": 0.889, |
| "6": 0.941, |
| "9": 0.95 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.826, |
| "1": 0.855, |
| "2": 0.896, |
| "6": 0.975, |
| "9": 0.949 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.849, |
| "1": 0.895, |
| "2": 0.915, |
| "6": 0.974, |
| "9": 0.96 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9500000476837158, |
| "13": 0.9430000185966492, |
| "14": 0.9480000734329224, |
| "18": 0.9240000247955322, |
| "19": 0.9610000252723694 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9490000605583191, |
| "13": 0.9470000267028809, |
| "14": 0.9550000429153442, |
| "18": 0.9230000376701355, |
| "19": 0.9650000333786011 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.71, |
| "13": 0.704, |
| "14": 0.651, |
| "18": 0.687, |
| "19": 0.773 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.789, |
| "13": 0.715, |
| "14": 0.684, |
| "18": 0.715, |
| "19": 0.789 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.831, |
| "13": 0.795, |
| "14": 0.778, |
| "18": 0.728, |
| "19": 0.836 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.569, |
| "13": 0.707, |
| "14": 0.89, |
| "18": 0.601, |
| "19": 0.797 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.753, |
| "13": 0.761, |
| "14": 0.89, |
| "18": 0.725, |
| "19": 0.815 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.933, |
| "13": 0.773, |
| "14": 0.888, |
| "18": 0.826, |
| "19": 0.871 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9510000348091125, |
| "21": 0.8960000276565552, |
| "22": 0.9130000472068787, |
| "25": 0.9490000605583191, |
| "26": 0.8720000386238098 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.9470000267028809, |
| "21": 0.906000018119812, |
| "22": 0.9160000681877136, |
| "25": 0.956000030040741, |
| "26": 0.8810000419616699 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.739, |
| "21": 0.596, |
| "22": 0.614, |
| "25": 0.706, |
| "26": 0.649 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.82, |
| "21": 0.728, |
| "22": 0.604, |
| "25": 0.713, |
| "26": 0.657 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.856, |
| "21": 0.776, |
| "22": 0.693, |
| "25": 0.769, |
| "26": 0.685 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.736, |
| "21": 0.755, |
| "22": 0.882, |
| "25": 0.879, |
| "26": 0.676 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.905, |
| "21": 0.767, |
| "22": 0.879, |
| "25": 0.894, |
| "26": 0.743 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.909, |
| "21": 0.769, |
| "22": 0.883, |
| "25": 0.916, |
| "26": 0.79 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9360000491142273, |
| "2": 0.9230000376701355, |
| "3": 0.9150000214576721, |
| "5": 0.906000018119812, |
| "6": 0.862000048160553 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9440000653266907, |
| "2": 0.9320000410079956, |
| "3": 0.906000018119812, |
| "5": 0.9150000214576721, |
| "6": 0.8410000205039978 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.707, |
| "2": 0.698, |
| "3": 0.608, |
| "5": 0.709, |
| "6": 0.582 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.743, |
| "2": 0.773, |
| "3": 0.59, |
| "5": 0.725, |
| "6": 0.616 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.774, |
| "2": 0.81, |
| "3": 0.652, |
| "5": 0.718, |
| "6": 0.664 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.658, |
| "2": 0.804, |
| "3": 0.583, |
| "5": 0.85, |
| "6": 0.647 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.873, |
| "2": 0.815, |
| "3": 0.608, |
| "5": 0.842, |
| "6": 0.662 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.895, |
| "2": 0.838, |
| "3": 0.76, |
| "5": 0.866, |
| "6": 0.717 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.909000039100647, |
| "5.0": 0.906000018119812 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.9170000553131104, |
| "5.0": 0.9170000553131104 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.63, |
| "5.0": 0.63 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.654, |
| "5.0": 0.654 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.668, |
| "5.0": 0.668 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.691, |
| "5.0": 0.691 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.735, |
| "5.0": 0.735 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.819, |
| "5.0": 0.819 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9490000605583191, |
| "Python": 0.984000027179718, |
| "HTML": 0.9850000739097595, |
| "Java": 0.9660000205039978, |
| "PHP": 0.9540000557899475 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9580000638961792, |
| "Python": 0.9850000739097595, |
| "HTML": 0.9890000224113464, |
| "Java": 0.9650000333786011, |
| "PHP": 0.9490000605583191 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.611, |
| "Python": 0.635, |
| "HTML": 0.724, |
| "Java": 0.605, |
| "PHP": 0.58 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.723, |
| "Python": 0.695, |
| "HTML": 0.94, |
| "Java": 0.605, |
| "PHP": 0.649 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.816, |
| "Python": 0.746, |
| "HTML": 0.946, |
| "Java": 0.79, |
| "PHP": 0.712 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.639, |
| "Python": 0.625, |
| "HTML": 0.881, |
| "Java": 0.638, |
| "PHP": 0.599 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.656, |
| "Python": 0.675, |
| "HTML": 0.893, |
| "Java": 0.645, |
| "PHP": 0.903 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.684, |
| "Python": 0.943, |
| "HTML": 0.942, |
| "Java": 0.78, |
| "PHP": 0.904 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9310000538825989, |
| "1": 0.9790000319480896, |
| "2": 0.9160000681877136, |
| "3": 0.9470000267028809 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9430000185966492, |
| "1": 0.9850000739097595, |
| "2": 0.9300000667572021, |
| "3": 0.9330000281333923 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.775, |
| "1": 0.681, |
| "2": 0.661, |
| "3": 0.772 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.779, |
| "1": 0.794, |
| "2": 0.693, |
| "3": 0.804 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.867, |
| "1": 0.875, |
| "2": 0.762, |
| "3": 0.848 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.767, |
| "1": 0.952, |
| "2": 0.589, |
| "3": 0.818 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.773, |
| "1": 0.95, |
| "2": 0.774, |
| "3": 0.788 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.822, |
| "1": 0.964, |
| "2": 0.79, |
| "3": 0.877 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 0.999000072479248, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 1.0, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.943, |
| "fr": 0.64, |
| "de": 0.813, |
| "es": 0.881, |
| "nl": 0.856 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.969, |
| "fr": 0.904, |
| "de": 0.841, |
| "es": 0.914, |
| "nl": 0.89 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.995, |
| "fr": 0.977, |
| "de": 0.892, |
| "es": 0.98, |
| "nl": 0.961 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.865, |
| "de": 0.921, |
| "es": 0.994, |
| "nl": 0.851 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.997, |
| "de": 0.94, |
| "es": 0.995, |
| "nl": 0.864 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.999, |
| "fr": 0.996, |
| "de": 0.991, |
| "es": 0.997, |
| "nl": 0.997 |
| } |
| } |
| } |
| } |