Text Generation
Transformers
Safetensors
abstract-cot
latent-reasoning
math-reasoning
qwen3
leapeto's picture
Add files using upload-large-folder tool
e68e718 verified
{
"losses": [
0.2751981415989576,
0.22893165266577853,
0.24834042781731114,
0.2638388895960816,
0.23417620072723366,
0.24215587118524126,
0.2851439667691011,
0.2613255390126142,
0.29196599322604017,
0.27143954291314004,
0.2879517031600699,
0.28859320177070913,
0.28058985095995015,
0.32684289703611286,
0.2814401665353216,
0.316986553161405,
0.29619569072965535,
0.31013335563475264,
0.3364840148482472,
0.3246558667509817,
0.3450019852258265,
0.36042039859457875,
0.359003718016902,
0.33632316675502805,
0.34056199092883616,
0.4061928411683766,
0.3674649982713163,
0.3189254213997629,
0.3698235720337834,
0.3649632065091282,
0.34908437901176514
],
"lrs": [
6.666666666666667e-05,
9.993008576227247e-05,
9.937194443381972e-05,
9.826190093588563e-05,
9.661236384224129e-05,
9.444177243274618e-05,
9.177439057064683e-05,
8.864003547001915e-05,
8.507374438531607e-05,
8.111538294891684e-05,
7.680919953486048e-05,
7.220333063028872e-05,
6.734926274378312e-05,
6.230125686563068e-05,
5.7115741913664264e-05,
5.185068394501791e-05,
4.6564938185035956e-05,
4.131759111665349e-05,
3.616729998467365e-05,
3.1171637098265064e-05,
2.638644626136587e-05,
2.1865218525109495e-05,
1.7658494240397126e-05,
1.3813298094746491e-05,
1.037261344883343e-05,
7.374901848832683e-06,
4.853673085668947e-06,
2.8371106072518195e-06,
1.3477564710088098e-06,
4.02259358460233e-07,
1.1188468644907079e-08
],
"wallclock_s": 10572,
"n_examples": 5000,
"epochs": 1,
"mode": "bottleneck",
"lora_rank": 32,
"total_opt_steps": 156,
"num_processes": 2
}