{ "best_global_step": 1200, "best_metric": 0.2331986129283905, "best_model_checkpoint": "/scratch/hk4488/SVG-Generation/outputs/svg_sft_v2/checkpoint-1200", "epoch": 1.315919374742904, "eval_steps": 200, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021938845468257234, "grad_norm": 0.14227350056171417, "learning_rate": 1.9e-05, "loss": 0.6225212097167969, "step": 20 }, { "epoch": 0.04387769093651447, "grad_norm": 0.10197694599628448, "learning_rate": 3.9000000000000006e-05, "loss": 0.445261812210083, "step": 40 }, { "epoch": 0.0658165364047717, "grad_norm": 0.0512363500893116, "learning_rate": 5.9e-05, "loss": 0.3574248790740967, "step": 60 }, { "epoch": 0.08775538187302893, "grad_norm": 0.0563778318464756, "learning_rate": 7.900000000000001e-05, "loss": 0.3272172212600708, "step": 80 }, { "epoch": 0.10969422734128617, "grad_norm": 0.08214055001735687, "learning_rate": 9.900000000000001e-05, "loss": 0.31246020793914797, "step": 100 }, { "epoch": 0.1316330728095434, "grad_norm": 0.09033175557851791, "learning_rate": 9.998718148881854e-05, "loss": 0.30087225437164306, "step": 120 }, { "epoch": 0.15357191827780062, "grad_norm": 0.09569519013166428, "learning_rate": 9.994599922823862e-05, "loss": 0.28826444149017333, "step": 140 }, { "epoch": 0.17551076374605787, "grad_norm": 0.0767015665769577, "learning_rate": 9.987644110531813e-05, "loss": 0.30547878742218015, "step": 160 }, { "epoch": 0.1974496092143151, "grad_norm": 0.07112172991037369, "learning_rate": 9.977854663817161e-05, "loss": 0.28853645324707033, "step": 180 }, { "epoch": 0.21938845468257234, "grad_norm": 0.09182991087436676, "learning_rate": 9.965237144366463e-05, "loss": 0.28967244625091554, "step": 200 }, { "epoch": 0.21938845468257234, "eval_loss": 0.27366939187049866, "eval_runtime": 78.78, "eval_samples_per_second": 4.227, "eval_steps_per_second": 1.066, "step": 200 }, { "epoch": 0.24132730015082957, "grad_norm": 0.07705853879451752, "learning_rate": 9.949798720581605e-05, "loss": 0.29224610328674316, "step": 220 }, { "epoch": 0.2632661456190868, "grad_norm": 0.07098541408777237, "learning_rate": 9.931548163507207e-05, "loss": 0.276334285736084, "step": 240 }, { "epoch": 0.28520499108734404, "grad_norm": 0.07781675457954407, "learning_rate": 9.910495841847541e-05, "loss": 0.2759185075759888, "step": 260 }, { "epoch": 0.30714383655560124, "grad_norm": 0.08888023346662521, "learning_rate": 9.886653716075723e-05, "loss": 0.27445273399353026, "step": 280 }, { "epoch": 0.3290826820238585, "grad_norm": 0.06509074568748474, "learning_rate": 9.860035331638622e-05, "loss": 0.2757601737976074, "step": 300 }, { "epoch": 0.35102152749211574, "grad_norm": 0.09211590141057968, "learning_rate": 9.830655811261269e-05, "loss": 0.27563354969024656, "step": 320 }, { "epoch": 0.372960372960373, "grad_norm": 0.07276469469070435, "learning_rate": 9.79853184635516e-05, "loss": 0.27464828491210935, "step": 340 }, { "epoch": 0.3948992184286302, "grad_norm": 0.08323675394058228, "learning_rate": 9.763681687535361e-05, "loss": 0.27996742725372314, "step": 360 }, { "epoch": 0.41683806389688743, "grad_norm": 0.06468376517295837, "learning_rate": 9.726125134251768e-05, "loss": 0.26534523963928225, "step": 380 }, { "epoch": 0.4387769093651447, "grad_norm": 0.07151263952255249, "learning_rate": 9.68588352354043e-05, "loss": 0.2829176664352417, "step": 400 }, { "epoch": 0.4387769093651447, "eval_loss": 0.258375346660614, "eval_runtime": 77.8053, "eval_samples_per_second": 4.28, "eval_steps_per_second": 1.08, "step": 400 }, { "epoch": 0.4607157548334019, "grad_norm": 0.08009252697229385, "learning_rate": 9.642979717901318e-05, "loss": 0.26982746124267576, "step": 420 }, { "epoch": 0.48265460030165913, "grad_norm": 0.06844024360179901, "learning_rate": 9.597438092309434e-05, "loss": 0.2733712911605835, "step": 440 }, { "epoch": 0.5045934457699164, "grad_norm": 0.0659785270690918, "learning_rate": 9.549284520366639e-05, "loss": 0.2664241552352905, "step": 460 }, { "epoch": 0.5265322912381736, "grad_norm": 0.07495546340942383, "learning_rate": 9.498546359602058e-05, "loss": 0.25587546825408936, "step": 480 }, { "epoch": 0.5484711367064308, "grad_norm": 0.06304614990949631, "learning_rate": 9.445252435929433e-05, "loss": 0.2597985744476318, "step": 500 }, { "epoch": 0.5704099821746881, "grad_norm": 0.061547983437776566, "learning_rate": 9.389433027270216e-05, "loss": 0.2667828559875488, "step": 520 }, { "epoch": 0.5923488276429453, "grad_norm": 0.121893972158432, "learning_rate": 9.331119846351766e-05, "loss": 0.2628504514694214, "step": 540 }, { "epoch": 0.6142876731112025, "grad_norm": 0.06609015166759491, "learning_rate": 9.27034602269036e-05, "loss": 0.2580559492111206, "step": 560 }, { "epoch": 0.6362265185794598, "grad_norm": 0.06037086248397827, "learning_rate": 9.207146083769294e-05, "loss": 0.257999324798584, "step": 580 }, { "epoch": 0.658165364047717, "grad_norm": 0.06207006052136421, "learning_rate": 9.141555935422756e-05, "loss": 0.25744664669036865, "step": 600 }, { "epoch": 0.658165364047717, "eval_loss": 0.24755167961120605, "eval_runtime": 77.8144, "eval_samples_per_second": 4.279, "eval_steps_per_second": 1.079, "step": 600 }, { "epoch": 0.6801042095159742, "grad_norm": 0.06428562849760056, "learning_rate": 9.073612841436621e-05, "loss": 0.26201529502868653, "step": 620 }, { "epoch": 0.7020430549842315, "grad_norm": 0.06118078902363777, "learning_rate": 9.003355402377736e-05, "loss": 0.2533463954925537, "step": 640 }, { "epoch": 0.7239819004524887, "grad_norm": 0.07041861861944199, "learning_rate": 8.93082353366375e-05, "loss": 0.26145546436309813, "step": 660 }, { "epoch": 0.745920745920746, "grad_norm": 0.07119100540876389, "learning_rate": 8.856058442885934e-05, "loss": 0.24657819271087647, "step": 680 }, { "epoch": 0.7678595913890032, "grad_norm": 0.06876585632562637, "learning_rate": 8.779102606397887e-05, "loss": 0.2610438823699951, "step": 700 }, { "epoch": 0.7897984368572604, "grad_norm": 0.07696516066789627, "learning_rate": 8.699999745183391e-05, "loss": 0.2537281274795532, "step": 720 }, { "epoch": 0.8117372823255177, "grad_norm": 0.06649173051118851, "learning_rate": 8.618794800017189e-05, "loss": 0.25040783882141116, "step": 740 }, { "epoch": 0.8336761277937749, "grad_norm": 0.06621742248535156, "learning_rate": 8.535533905932738e-05, "loss": 0.2587742805480957, "step": 760 }, { "epoch": 0.8556149732620321, "grad_norm": 0.06762684881687164, "learning_rate": 8.450264366011475e-05, "loss": 0.2528057336807251, "step": 780 }, { "epoch": 0.8775538187302894, "grad_norm": 0.07194508612155914, "learning_rate": 8.363034624508475e-05, "loss": 0.25876269340515134, "step": 800 }, { "epoch": 0.8775538187302894, "eval_loss": 0.2410527616739273, "eval_runtime": 78.1022, "eval_samples_per_second": 4.264, "eval_steps_per_second": 1.076, "step": 800 }, { "epoch": 0.8994926641985466, "grad_norm": 0.06729619204998016, "learning_rate": 8.273894239329778e-05, "loss": 0.24897451400756837, "step": 820 }, { "epoch": 0.9214315096668038, "grad_norm": 0.061502765864133835, "learning_rate": 8.182893853877018e-05, "loss": 0.25374341011047363, "step": 840 }, { "epoch": 0.9433703551350611, "grad_norm": 0.058828603476285934, "learning_rate": 8.09008516827534e-05, "loss": 0.25022833347320556, "step": 860 }, { "epoch": 0.9653092006033183, "grad_norm": 0.07917571067810059, "learning_rate": 7.995520910000965e-05, "loss": 0.24162437915802001, "step": 880 }, { "epoch": 0.9872480460715755, "grad_norm": 0.06906663626432419, "learning_rate": 7.899254803925081e-05, "loss": 0.2524584770202637, "step": 900 }, { "epoch": 1.008775538187303, "grad_norm": 0.06018976867198944, "learning_rate": 7.801341541791102e-05, "loss": 0.24264614582061766, "step": 920 }, { "epoch": 1.0307143836555601, "grad_norm": 0.06674794107675552, "learning_rate": 7.701836751142583e-05, "loss": 0.23144776821136476, "step": 940 }, { "epoch": 1.0526532291238173, "grad_norm": 0.06158677116036415, "learning_rate": 7.600796963719511e-05, "loss": 0.24413840770721434, "step": 960 }, { "epoch": 1.0745920745920745, "grad_norm": 0.06630289554595947, "learning_rate": 7.498279583340874e-05, "loss": 0.23648512363433838, "step": 980 }, { "epoch": 1.096530920060332, "grad_norm": 0.061364974826574326, "learning_rate": 7.39434285329178e-05, "loss": 0.25753345489501955, "step": 1000 }, { "epoch": 1.096530920060332, "eval_loss": 0.2369629442691803, "eval_runtime": 78.0044, "eval_samples_per_second": 4.269, "eval_steps_per_second": 1.077, "step": 1000 }, { "epoch": 1.1184697655285891, "grad_norm": 0.06297387927770615, "learning_rate": 7.289045823233657e-05, "loss": 0.24016919136047363, "step": 1020 }, { "epoch": 1.1404086109968463, "grad_norm": 0.07347658276557922, "learning_rate": 7.182448315656311e-05, "loss": 0.2379443407058716, "step": 1040 }, { "epoch": 1.1623474564651035, "grad_norm": 0.059618670493364334, "learning_rate": 7.074610891890934e-05, "loss": 0.23564295768737792, "step": 1060 }, { "epoch": 1.1842863019333607, "grad_norm": 0.07202780246734619, "learning_rate": 6.965594817703333e-05, "loss": 0.23296499252319336, "step": 1080 }, { "epoch": 1.206225147401618, "grad_norm": 0.06817714124917984, "learning_rate": 6.855462028486975e-05, "loss": 0.252462100982666, "step": 1100 }, { "epoch": 1.228163992869875, "grad_norm": 0.06484023481607437, "learning_rate": 6.744275094075565e-05, "loss": 0.24110569953918456, "step": 1120 }, { "epoch": 1.2501028383381325, "grad_norm": 0.05871213600039482, "learning_rate": 6.6320971831952e-05, "loss": 0.24410395622253417, "step": 1140 }, { "epoch": 1.2720416838063897, "grad_norm": 0.06409748643636703, "learning_rate": 6.518992027576267e-05, "loss": 0.2396394729614258, "step": 1160 }, { "epoch": 1.293980529274647, "grad_norm": 0.06916816532611847, "learning_rate": 6.405023885745488e-05, "loss": 0.25106277465820315, "step": 1180 }, { "epoch": 1.315919374742904, "grad_norm": 0.06698701530694962, "learning_rate": 6.290257506518655e-05, "loss": 0.23651955127716065, "step": 1200 }, { "epoch": 1.315919374742904, "eval_loss": 0.2331986129283905, "eval_runtime": 78.0786, "eval_samples_per_second": 4.265, "eval_steps_per_second": 1.076, "step": 1200 } ], "logging_steps": 20, "max_steps": 2736, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.789060984339809e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }