| { | |
| "act_cache_key": "46ba7c59419d60fb", | |
| "agent": "agent_hammer", | |
| "arch": "tsae_paper", | |
| "arch_version": "1.0.0", | |
| "datasource": "toy_hierarchical_Kg10_Kl30_d256_sigma2p0", | |
| "saved_ts": "2026-05-07T01:18:50Z", | |
| "seed": 2, | |
| "train_key": "03d0a8d22d0b8a87", | |
| "training_cfg": { | |
| "arch_hparams_override": { | |
| "d_sae": 40, | |
| "k_pos": 3 | |
| }, | |
| "batch_size": 1024, | |
| "bricken_enabled": false, | |
| "bricken_max_resample_fraction": 0.5, | |
| "bricken_min_fires": 1, | |
| "bricken_n_check": 2048, | |
| "bricken_resample_every": 500, | |
| "dead_threshold_tokens": 10000000, | |
| "ema_auxk_alpha": 0.03125, | |
| "learning_rate": 0.0003, | |
| "n_steps": 8000, | |
| "optimizer": "adam", | |
| "plateau_early_stop": false, | |
| "plateau_min_delta": 0.0001, | |
| "plateau_patience": 5000, | |
| "precision": "bf16", | |
| "train_window_size": 2, | |
| "warmup_steps": 1000 | |
| } | |
| } |