dancinlife commited on
Commit
e6b94d4
·
verified ·
1 Parent(s): fed1e6b

feat(hexad): v4-py-hexad-tension-d768x12L-cycle1-2026-05-17 — fire.log

Browse files
Files changed (1) hide show
  1. fire.log +50 -0
fire.log ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Warning: Permanently added '[ssh3.vast.ai]:12374' (ED25519) to the list of known hosts.
2
+ Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.
3
+ Have fun!
4
+ /workspace/anima/train_d768x12l_tension.py:130: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
5
+ scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
6
+ {"step": 1, "ce": 5.640663, "gn2": 30.418763, "tension": 5.515321, "tension_ema": 5.515321, "hybrid_mult": 1.0, "ppl": 281.6494, "base_lr": 2.4e-06, "lr": 2.4e-06, "wall_s": 0.41, "gpu_mem_gb": 7.455}
7
+ {"step": 62, "ce": 2.32522, "gn2": 4.459097, "tension": 2.111657, "tension_ema": 4.051757, "hybrid_mult": 0.5187, "ppl": 10.2289, "base_lr": 0.0001488, "lr": 7.718e-05, "wall_s": 8.24, "gpu_mem_gb": 9.685}
8
+ {"step": 124, "ce": 1.425178, "gn2": 3.213177, "tension": 1.792534, "tension_ema": 3.156928, "hybrid_mult": 0.5653, "ppl": 4.1586, "base_lr": 0.0002976, "lr": 0.00016825, "wall_s": 16.2, "gpu_mem_gb": 9.685}
9
+ {"step": 186, "ce": 0.367892, "gn2": 2.70603, "tension": 1.645002, "tension_ema": 2.595913, "hybrid_mult": 0.6314, "ppl": 1.4447, "base_lr": 0.00029958, "lr": 0.00018914, "wall_s": 24.15, "gpu_mem_gb": 9.685}
10
+ {"step": 248, "ce": 0.092343, "gn2": 0.343442, "tension": 0.586039, "tension_ema": 1.84945, "hybrid_mult": 0.5, "ppl": 1.0967, "base_lr": 0.00029825, "lr": 0.00014912, "wall_s": 32.11, "gpu_mem_gb": 9.685}
11
+ {"step": 310, "ce": 0.051104, "gn2": 0.155324, "tension": 0.394111, "tension_ema": 1.720276, "hybrid_mult": 0.5, "ppl": 1.0524, "base_lr": 0.00029602, "lr": 0.00014801, "wall_s": 40.07, "gpu_mem_gb": 9.685}
12
+ {"step": 372, "ce": 0.032159, "gn2": 0.096184, "tension": 0.310136, "tension_ema": 1.107521, "hybrid_mult": 0.5, "ppl": 1.0327, "base_lr": 0.00029292, "lr": 0.00014646, "wall_s": 48.03, "gpu_mem_gb": 9.685}
13
+ {"step": 434, "ce": 0.073192, "gn2": 0.189307, "tension": 0.435094, "tension_ema": 1.509156, "hybrid_mult": 0.5, "ppl": 1.0759, "base_lr": 0.00028895, "lr": 0.00014448, "wall_s": 55.99, "gpu_mem_gb": 9.685}
14
+ {"step": 496, "ce": 0.025241, "gn2": 0.03459, "tension": 0.185985, "tension_ema": 0.9298, "hybrid_mult": 0.5, "ppl": 1.0256, "base_lr": 0.00028415, "lr": 0.00014208, "wall_s": 64.0, "gpu_mem_gb": 9.685}
15
+ {"step": 558, "ce": 0.026985, "gn2": 0.053764, "tension": 0.23187, "tension_ema": 0.637487, "hybrid_mult": 0.5, "ppl": 1.0274, "base_lr": 0.00027855, "lr": 0.00013928, "wall_s": 71.95, "gpu_mem_gb": 9.685}
16
+ {"step": 620, "ce": 0.044962, "gn2": 0.082591, "tension": 0.287387, "tension_ema": 0.93649, "hybrid_mult": 0.5, "ppl": 1.046, "base_lr": 0.00027219, "lr": 0.00013609, "wall_s": 79.91, "gpu_mem_gb": 9.685}
17
+ {"step": 682, "ce": 0.020919, "gn2": 0.055819, "tension": 0.236261, "tension_ema": 0.587089, "hybrid_mult": 0.5, "ppl": 1.0211, "base_lr": 0.00026511, "lr": 0.00013255, "wall_s": 87.87, "gpu_mem_gb": 9.685}
18
+ {"step": 744, "ce": 0.019956, "gn2": 0.032276, "tension": 0.179656, "tension_ema": 0.40721, "hybrid_mult": 0.5, "ppl": 1.0202, "base_lr": 0.00025735, "lr": 0.00012867, "wall_s": 95.83, "gpu_mem_gb": 9.685}
19
+ {"step": 806, "ce": 0.023102, "gn2": 0.010866, "tension": 0.10424, "tension_ema": 0.508826, "hybrid_mult": 0.5, "ppl": 1.0234, "base_lr": 0.00024897, "lr": 0.00012449, "wall_s": 103.79, "gpu_mem_gb": 9.685}
20
+ {"step": 868, "ce": 0.019404, "gn2": 0.214611, "tension": 0.463261, "tension_ema": 0.333574, "hybrid_mult": 1.3943, "ppl": 1.0196, "base_lr": 0.00024003, "lr": 0.00033466, "wall_s": 111.75, "gpu_mem_gb": 9.685}
21
+ {"step": 930, "ce": 0.01847, "gn2": 0.006393, "tension": 0.079957, "tension_ema": 0.384131, "hybrid_mult": 0.5, "ppl": 1.0186, "base_lr": 0.00023058, "lr": 0.00011529, "wall_s": 119.7, "gpu_mem_gb": 9.685}
22
+ {"step": 992, "ce": 0.019321, "gn2": 0.016822, "tension": 0.1297, "tension_ema": 0.261188, "hybrid_mult": 0.5, "ppl": 1.0195, "base_lr": 0.0002207, "lr": 0.00011035, "wall_s": 127.66, "gpu_mem_gb": 9.685}
23
+ {"step": 1054, "ce": 0.133722, "gn2": 2.679645, "tension": 1.636962, "tension_ema": 0.254753, "hybrid_mult": 2.0, "ppl": 1.1431, "base_lr": 0.00021044, "lr": 0.00042087, "wall_s": 135.62, "gpu_mem_gb": 9.685}
24
+ {"step": 1116, "ce": 0.016094, "gn2": 0.019727, "tension": 0.140452, "tension_ema": 0.278774, "hybrid_mult": 0.5013, "ppl": 1.0162, "base_lr": 0.00019987, "lr": 0.0001002, "wall_s": 143.58, "gpu_mem_gb": 9.685}
25
+ {"step": 1178, "ce": 0.014073, "gn2": 0.005173, "tension": 0.071925, "tension_ema": 0.187478, "hybrid_mult": 0.5, "ppl": 1.0142, "base_lr": 0.00018907, "lr": 9.453e-05, "wall_s": 151.54, "gpu_mem_gb": 9.685}
26
+ {"step": 1240, "ce": 0.02907, "gn2": 0.039467, "tension": 0.198663, "tension_ema": 0.157182, "hybrid_mult": 1.2673, "ppl": 1.0295, "base_lr": 0.0001781, "lr": 0.00022571, "wall_s": 159.49, "gpu_mem_gb": 9.685}
27
+ {"step": 1302, "ce": 0.014461, "gn2": 0.008681, "tension": 0.093172, "tension_ema": 0.224678, "hybrid_mult": 0.5, "ppl": 1.0146, "base_lr": 0.00016705, "lr": 8.353e-05, "wall_s": 167.44, "gpu_mem_gb": 9.685}
28
+ {"step": 1364, "ce": 0.015863, "gn2": 0.007843, "tension": 0.088562, "tension_ema": 0.176058, "hybrid_mult": 0.5005, "ppl": 1.016, "base_lr": 0.00015599, "lr": 7.807e-05, "wall_s": 175.4, "gpu_mem_gb": 9.685}
29
+ {"step": 1426, "ce": 0.016053, "gn2": 0.272274, "tension": 0.521799, "tension_ema": 0.287108, "hybrid_mult": 1.8326, "ppl": 1.0162, "base_lr": 0.00014498, "lr": 0.00026569, "wall_s": 183.36, "gpu_mem_gb": 9.685}
30
+ {"step": 1488, "ce": 0.011513, "gn2": 0.003074, "tension": 0.055445, "tension_ema": 0.1871, "hybrid_mult": 0.5, "ppl": 1.0116, "base_lr": 0.00013411, "lr": 6.706e-05, "wall_s": 191.38, "gpu_mem_gb": 9.685}
31
+ {"step": 1550, "ce": 0.014431, "gn2": 0.007639, "tension": 0.087404, "tension_ema": 0.131829, "hybrid_mult": 0.6608, "ppl": 1.0145, "base_lr": 0.00012345, "lr": 8.157e-05, "wall_s": 199.34, "gpu_mem_gb": 9.685}
32
+ {"step": 1612, "ce": 0.015038, "gn2": 0.069711, "tension": 0.264028, "tension_ema": 0.11471, "hybrid_mult": 2.0, "ppl": 1.0152, "base_lr": 0.00011307, "lr": 0.00022614, "wall_s": 207.31, "gpu_mem_gb": 9.685}
33
+ {"step": 1674, "ce": 0.011891, "gn2": 0.003348, "tension": 0.057862, "tension_ema": 0.101351, "hybrid_mult": 0.5684, "ppl": 1.012, "base_lr": 0.00010304, "lr": 5.857e-05, "wall_s": 215.26, "gpu_mem_gb": 9.685}
34
+ {"step": 1736, "ce": 0.010649, "gn2": 0.00499, "tension": 0.070638, "tension_ema": 0.083665, "hybrid_mult": 0.843, "ppl": 1.0107, "base_lr": 9.342e-05, "lr": 7.875e-05, "wall_s": 223.22, "gpu_mem_gb": 9.685}
35
+ {"step": 1798, "ce": 0.006095, "gn2": 0.001609, "tension": 0.040115, "tension_ema": 0.072206, "hybrid_mult": 0.5531, "ppl": 1.0061, "base_lr": 8.428e-05, "lr": 4.662e-05, "wall_s": 231.17, "gpu_mem_gb": 9.685}
36
+ {"step": 1860, "ce": 0.011065, "gn2": 0.006058, "tension": 0.07783, "tension_ema": 0.077525, "hybrid_mult": 1.004, "ppl": 1.0111, "base_lr": 7.569e-05, "lr": 7.599e-05, "wall_s": 239.13, "gpu_mem_gb": 9.685}
37
+ {"step": 1922, "ce": 0.009454, "gn2": 0.007259, "tension": 0.0852, "tension_ema": 0.07025, "hybrid_mult": 1.2154, "ppl": 1.0095, "base_lr": 6.77e-05, "lr": 8.228e-05, "wall_s": 247.09, "gpu_mem_gb": 9.685}
38
+ {"step": 1984, "ce": 0.008228, "gn2": 0.001878, "tension": 0.04334, "tension_ema": 0.063311, "hybrid_mult": 0.6824, "ppl": 1.0083, "base_lr": 6.036e-05, "lr": 4.119e-05, "wall_s": 255.05, "gpu_mem_gb": 9.685}
39
+ {"step": 2046, "ce": 0.008526, "gn2": 0.001148, "tension": 0.033876, "tension_ema": 0.057639, "hybrid_mult": 0.5853, "ppl": 1.0086, "base_lr": 5.372e-05, "lr": 3.144e-05, "wall_s": 263.01, "gpu_mem_gb": 9.685}
40
+ {"step": 2108, "ce": 0.010443, "gn2": 0.00357, "tension": 0.059749, "tension_ema": 0.054624, "hybrid_mult": 1.0949, "ppl": 1.0105, "base_lr": 4.783e-05, "lr": 5.237e-05, "wall_s": 270.96, "gpu_mem_gb": 9.685}
41
+ {"step": 2170, "ce": 0.009984, "gn2": 0.004114, "tension": 0.064138, "tension_ema": 0.052662, "hybrid_mult": 1.2206, "ppl": 1.01, "base_lr": 4.273e-05, "lr": 5.216e-05, "wall_s": 278.92, "gpu_mem_gb": 9.685}
42
+ {"step": 2232, "ce": 0.008678, "gn2": 0.001758, "tension": 0.041926, "tension_ema": 0.049852, "hybrid_mult": 0.8397, "ppl": 1.0087, "base_lr": 3.846e-05, "lr": 3.229e-05, "wall_s": 286.84, "gpu_mem_gb": 9.685}
43
+ {"step": 2294, "ce": 0.008668, "gn2": 0.001581, "tension": 0.039767, "tension_ema": 0.048133, "hybrid_mult": 0.8247, "ppl": 1.0087, "base_lr": 3.503e-05, "lr": 2.889e-05, "wall_s": 294.8, "gpu_mem_gb": 9.685}
44
+ {"step": 2356, "ce": 0.008016, "gn2": 0.00296, "tension": 0.054409, "tension_ema": 0.047827, "hybrid_mult": 1.1392, "ppl": 1.008, "base_lr": 3.248e-05, "lr": 3.7e-05, "wall_s": 302.76, "gpu_mem_gb": 9.685}
45
+ {"step": 2418, "ce": 0.012541, "gn2": 0.002424, "tension": 0.049238, "tension_ema": 0.047811, "hybrid_mult": 1.0302, "ppl": 1.0126, "base_lr": 3.081e-05, "lr": 3.174e-05, "wall_s": 310.72, "gpu_mem_gb": 9.685}
46
+ {"step": 2480, "ce": 0.009382, "gn2": 0.002626, "tension": 0.051242, "tension_ema": 0.04711, "hybrid_mult": 1.0887, "ppl": 1.0094, "base_lr": 3.005e-05, "lr": 3.272e-05, "wall_s": 318.73, "gpu_mem_gb": 9.685}
47
+ {"step": 2500, "ce": 0.007762, "gn2": 0.001495, "tension": 0.038659, "tension_ema": 0.046574, "hybrid_mult": 0.8286, "ppl": 1.0078, "base_lr": 3e-05, "lr": 2.486e-05, "wall_s": 321.3, "gpu_mem_gb": 9.685}
48
+ RESULT_JSON_WRITTEN
49
+ {"init_ce": 5.640663, "final_ce": 0.007762, "ce_descent": 5.632901, "wall_s": 321.3, "n_params_M": 283.72, "final_tension_ema": 0.046574, "mult_distribution": {"lt_0_75": 1599, "0_75_to_1_25": 686, "gt_1_25": 215}}
50
+ DONE_MARKER rc=0