txcdr-base / training_logs /phase5b_subseq_track2__seed2.json
han1823123123's picture
Upload training_logs/phase5b_subseq_track2__seed2.json with huggingface_hub
573dcfe verified
{
"loss": [
16269.009765625,
6297.28369140625,
5702.6279296875,
5396.48388671875,
5268.8984375,
5168.9423828125,
5113.98583984375,
5084.68798828125,
5014.6572265625,
5003.88134765625,
4960.1376953125,
4923.68505859375,
4949.88916015625,
4905.4580078125,
4894.71142578125,
4865.13916015625,
4836.7626953125,
4881.08349609375
],
"l0": [
500.0,
497.01904296875,
496.993896484375,
496.538330078125,
496.166748046875,
497.13623046875,
496.052001953125,
496.489501953125,
495.185546875,
495.975341796875,
496.177734375,
494.714599609375,
495.918701171875,
495.106201171875,
495.29248046875,
495.2666015625,
494.379150390625,
496.396484375
],
"steps_logged": [
0,
200,
400,
600,
800,
1000,
1200,
1400,
1600,
1800,
2000,
2200,
2400,
2600,
2800,
3000,
3200,
3400
],
"final_step": 3400,
"converged": true,
"plateau_last": 0.0188753812595845,
"elapsed_s": 2169.8539748191833,
"T_max": 10,
"t_sample": 5,
"row": 12,
"arch_id": "phase5b_subseq_track2",
"arch": "phase5b_subseq_track2",
"group": 2,
"src_class": "SubseqTXCBareAntidead",
"src_module": "src.architectures.phase5b_subseq_sampling_txcdr",
"T": null,
"n_layers": null,
"k_win": 500,
"k_pos": 100,
"shifts": null,
"alpha": null,
"gamma": null,
"n_scales": null,
"seed": 2,
"d_in": 2304,
"d_sae": 18432,
"subject_model": "google/gemma-2-2b",
"anchor_layer": 12,
"mlc_layers": [
10,
11,
12,
13,
14
],
"phase": "phase7_unification",
"run_id": "phase5b_subseq_track2__seed2"
}