File size: 1,245 Bytes
b2797fb 144fd82 b2797fb 144fd82 b2797fb 144fd82 b2797fb 144fd82 b2797fb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | {
"architecture": "cross_model_crosscoder",
"variant": "BatchTopK with JumpReLU inference threshold",
"reference": "Lindsey 2024 + Anthropic Jan-2025 update + Minder NeurIPS 2025",
"base_model": "google/gemma-2-2b",
"chat_model": "google/gemma-2-2b-it",
"layer": 13,
"d_model": 2304,
"expansion": 32,
"k_batchtopk": 100,
"k_warmup_init": 1000,
"k_warmup_steps": 5000,
"dec_init_norm": 1.0,
"token_budget": 100000000,
"seq_len": 512,
"fwd_batch": 4,
"cc_batch": 4096,
"lr": 0.0001,
"lambda_l1": 0.041,
"warmup_steps": 1000,
"grad_clip": 1.0,
"lr_decay_frac": 0.2,
"checkpoint_every_tokens": 5000000,
"data_web_frac": 0.5,
"data_chat_frac": 0.5,
"hf_user": "caiovicentino1",
"hf_repo_name": "gemma2-2b-crosscoder-model-diff-papergrade",
"n_features": 73728,
"hf_repo": "caiovicentino1/gemma2-2b-crosscoder-model-diff-papergrade",
"norm_scale_A": 0.2677689692703698,
"norm_scale_B": 0.23634909321590494,
"val_metrics": {
"ve_A": 0.8773096382617951,
"ve_B": 0.8665769904851913,
"L0": 100.53505859375,
"dead_frac": 0.4289415180683136
},
"taxonomy_counts": {
"shared": 39711,
"dead": 31625,
"unclassified": 2385,
"base_only": 4,
"chat_only": 3
}
} |