caiovicentino1's picture
Upload folder using huggingface_hub
144fd82 verified
{
"architecture": "cross_model_crosscoder",
"variant": "BatchTopK with JumpReLU inference threshold",
"reference": "Lindsey 2024 + Anthropic Jan-2025 update + Minder NeurIPS 2025",
"base_model": "google/gemma-2-2b",
"chat_model": "google/gemma-2-2b-it",
"layer": 13,
"d_model": 2304,
"expansion": 32,
"k_batchtopk": 100,
"k_warmup_init": 1000,
"k_warmup_steps": 5000,
"dec_init_norm": 1.0,
"token_budget": 100000000,
"seq_len": 512,
"fwd_batch": 4,
"cc_batch": 4096,
"lr": 0.0001,
"lambda_l1": 0.041,
"warmup_steps": 1000,
"grad_clip": 1.0,
"lr_decay_frac": 0.2,
"checkpoint_every_tokens": 5000000,
"data_web_frac": 0.5,
"data_chat_frac": 0.5,
"hf_user": "caiovicentino1",
"hf_repo_name": "gemma2-2b-crosscoder-model-diff-papergrade",
"n_features": 73728,
"hf_repo": "caiovicentino1/gemma2-2b-crosscoder-model-diff-papergrade",
"norm_scale_A": 0.2677689692703698,
"norm_scale_B": 0.23634909321590494,
"val_metrics": {
"ve_A": 0.8773096382617951,
"ve_B": 0.8665769904851913,
"L0": 100.53505859375,
"dead_frac": 0.4289415180683136
},
"taxonomy_counts": {
"shared": 39711,
"dead": 31625,
"unclassified": 2385,
"base_only": 4,
"chat_only": 3
}
}