{ "architecture": "cross_model_crosscoder", "variant": "BatchTopK with JumpReLU inference threshold", "reference": "Lindsey 2024 + Anthropic Jan-2025 update + Minder NeurIPS 2025", "base_model": "google/gemma-2-2b", "chat_model": "google/gemma-2-2b-it", "layer": 13, "d_model": 2304, "expansion": 32, "k_batchtopk": 100, "k_warmup_init": 1000, "k_warmup_steps": 5000, "dec_init_norm": 1.0, "token_budget": 100000000, "seq_len": 512, "fwd_batch": 4, "cc_batch": 4096, "lr": 0.0001, "lambda_l1": 0.041, "warmup_steps": 1000, "grad_clip": 1.0, "lr_decay_frac": 0.2, "checkpoint_every_tokens": 5000000, "data_web_frac": 0.5, "data_chat_frac": 0.5, "hf_user": "caiovicentino1", "hf_repo_name": "gemma2-2b-crosscoder-model-diff-papergrade", "n_features": 73728, "hf_repo": "caiovicentino1/gemma2-2b-crosscoder-model-diff-papergrade", "norm_scale_A": 0.2677689692703698, "norm_scale_B": 0.23634909321590494, "val_metrics": { "ve_A": 0.8773096382617951, "ve_B": 0.8665769904851913, "L0": 100.53505859375, "dead_frac": 0.4289415180683136 }, "taxonomy_counts": { "shared": 39711, "dead": 31625, "unclassified": 2385, "base_only": 4, "chat_only": 3 } }