txcdr-base / training_logs /mlc__seed2.json
han1823123123's picture
Upload training_logs/mlc__seed2.json with huggingface_hub
9a3c8f1 verified
{
"loss": [
108364.0234375,
4026.425048828125,
3392.5234375,
3163.514892578125,
3035.2373046875,
2914.15185546875,
2829.30126953125,
2792.417724609375,
2742.762451171875,
2776.29541015625,
2653.72314453125,
2669.077880859375,
2629.986572265625,
2664.68994140625,
2601.24609375,
2580.51806640625,
2639.58642578125,
2563.909912109375,
2561.493408203125,
2594.917724609375,
2565.618408203125
],
"l0": [
500.0,
492.274658203125,
494.14306640625,
492.90283203125,
493.268310546875,
492.36572265625,
493.102294921875,
493.1220703125,
491.611083984375,
493.525146484375,
490.8408203125,
490.330810546875,
488.69091796875,
491.362548828125,
491.734375,
490.404052734375,
489.929443359375,
489.762451171875,
491.277099609375,
489.604736328125,
489.1552734375
],
"steps_logged": [
0,
200,
400,
600,
800,
1000,
1200,
1400,
1600,
1800,
2000,
2200,
2400,
2600,
2800,
3000,
3200,
3400,
3600,
3800,
4000
],
"final_step": 4000,
"converged": true,
"plateau_last": 0.016735184303764403,
"elapsed_s": 768.920223236084,
"row": 4,
"arch_id": "mlc",
"arch": "mlc",
"group": 1,
"src_class": "MultiLayerCrosscoder",
"src_module": "src.architectures.mlc",
"T": null,
"T_max": null,
"t_sample": null,
"n_layers": 5,
"k_win": 500,
"k_pos": 100,
"shifts": null,
"alpha": null,
"gamma": null,
"n_scales": null,
"seed": 2,
"d_in": 2304,
"d_sae": 18432,
"subject_model": "google/gemma-2-2b",
"anchor_layer": 12,
"mlc_layers": [
10,
11,
12,
13,
14
],
"phase": "phase7_unification",
"run_id": "mlc__seed2"
}