| { |
| "architecture": "cross_model_crosscoder", |
| "variant": "BatchTopK with JumpReLU inference threshold", |
| "reference": "Lindsey 2024 + Anthropic Jan-2025 update + Minder NeurIPS 2025", |
| "base_model": "google/gemma-2-2b", |
| "chat_model": "google/gemma-2-2b-it", |
| "layer": 13, |
| "d_model": 2304, |
| "expansion": 32, |
| "k_batchtopk": 100, |
| "k_warmup_init": 1000, |
| "k_warmup_steps": 5000, |
| "dec_init_norm": 1.0, |
| "token_budget": 100000000, |
| "seq_len": 512, |
| "fwd_batch": 4, |
| "cc_batch": 4096, |
| "lr": 0.0001, |
| "lambda_l1": 0.041, |
| "warmup_steps": 1000, |
| "grad_clip": 1.0, |
| "lr_decay_frac": 0.2, |
| "checkpoint_every_tokens": 5000000, |
| "data_web_frac": 0.5, |
| "data_chat_frac": 0.5, |
| "hf_user": "caiovicentino1", |
| "hf_repo_name": "gemma2-2b-crosscoder-model-diff-papergrade", |
| "n_features": 73728, |
| "hf_repo": "caiovicentino1/gemma2-2b-crosscoder-model-diff-papergrade", |
| "norm_scale_A": 0.2677689692703698, |
| "norm_scale_B": 0.23634909321590494, |
| "val_metrics": { |
| "ve_A": 0.8773096382617951, |
| "ve_B": 0.8665769904851913, |
| "L0": 100.53505859375, |
| "dead_frac": 0.4289415180683136 |
| }, |
| "taxonomy_counts": { |
| "shared": 39711, |
| "dead": 31625, |
| "unclassified": 2385, |
| "base_only": 4, |
| "chat_only": 3 |
| } |
| } |