diff --git "a/checkpoints-v2.6-c/checkpoint-96209/trainer_state.json" "b/checkpoints-v2.6-c/checkpoint-96209/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoints-v2.6-c/checkpoint-96209/trainer_state.json" @@ -0,0 +1,3010 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 1024, + "global_step": 96209, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010643494891330332, + "grad_norm": 0.13342437148094177, + "learning_rate": 0.0003330078125, + "loss": 2.2998437881469727, + "step": 1024 + }, + { + "epoch": 0.010643494891330332, + "eval_cos_loss": 0.5988449528813362, + "eval_loss": 1.9600126259028912, + "eval_mse_loss": 1.6605901420116425, + "flow/cos_sim": 0.4011551085859537, + "flow/improvement_ratio": 0.942170824855566, + "flow/mag_ratio_mean": 0.37856073677539825, + "flow/mag_ratio_std": 0.14085532305762172, + "step": 1024 + }, + { + "epoch": 0.010643494891330332, + "eval_cos_loss": 0.5988449528813362, + "eval_loss": 1.9600126259028912, + "eval_mse_loss": 1.6605901420116425, + "eval_runtime": 2.6584, + "eval_samples_per_second": 752.329, + "eval_steps_per_second": 12.037, + "flow/cos_sim": 0.4011551085859537, + "flow/improvement_ratio": 0.942170824855566, + "flow/mag_ratio_mean": 0.37856073677539825, + "flow/mag_ratio_std": 0.14085532305762172, + "step": 1024 + }, + { + "epoch": 0.021286989782660665, + "grad_norm": 0.25054192543029785, + "learning_rate": 0.0006663411458333333, + "loss": 1.8492329120635986, + "step": 2048 + }, + { + "epoch": 0.021286989782660665, + "eval_cos_loss": 0.5117531130090356, + "eval_loss": 1.7429817728698254, + "eval_mse_loss": 1.4871052131056786, + "flow/cos_sim": 0.4882468534633517, + "flow/improvement_ratio": 0.9563530795276165, + "flow/mag_ratio_mean": 0.47669631242752075, + "flow/mag_ratio_std": 0.17675806442275643, + "step": 2048 + }, + { + "epoch": 0.021286989782660665, + "eval_cos_loss": 0.5117531130090356, + "eval_loss": 1.7429817728698254, + "eval_mse_loss": 1.4871052131056786, + "eval_runtime": 2.511, + "eval_samples_per_second": 796.509, + "eval_steps_per_second": 12.744, + "flow/cos_sim": 0.4882468534633517, + "flow/improvement_ratio": 0.9563530795276165, + "flow/mag_ratio_mean": 0.47669631242752075, + "flow/mag_ratio_std": 0.17675806442275643, + "step": 2048 + }, + { + "epoch": 0.031930484673991, + "grad_norm": 0.30941224098205566, + "learning_rate": 0.0009996744791666667, + "loss": 1.730944037437439, + "step": 3072 + }, + { + "epoch": 0.031930484673991, + "eval_cos_loss": 0.4815286351367831, + "eval_loss": 1.6586528308689594, + "eval_mse_loss": 1.4178885221481323, + "flow/cos_sim": 0.5184714393690228, + "flow/improvement_ratio": 0.9605911839753389, + "flow/mag_ratio_mean": 0.49818364903330803, + "flow/mag_ratio_std": 0.1928270636126399, + "step": 3072 + }, + { + "epoch": 0.031930484673991, + "eval_cos_loss": 0.4815286351367831, + "eval_loss": 1.6586528308689594, + "eval_mse_loss": 1.4178885221481323, + "eval_runtime": 3.1033, + "eval_samples_per_second": 644.485, + "eval_steps_per_second": 10.312, + "flow/cos_sim": 0.5184714393690228, + "flow/improvement_ratio": 0.9605911839753389, + "flow/mag_ratio_mean": 0.49818364903330803, + "flow/mag_ratio_std": 0.1928270636126399, + "step": 3072 + }, + { + "epoch": 0.04257397956532133, + "grad_norm": 0.22964967787265778, + "learning_rate": 0.0009997023516784352, + "loss": 1.6850833892822266, + "step": 4096 + }, + { + "epoch": 0.04257397956532133, + "eval_cos_loss": 0.476364528760314, + "eval_loss": 1.6391540355980396, + "eval_mse_loss": 1.4009717665612698, + "flow/cos_sim": 0.5236354488879442, + "flow/improvement_ratio": 0.9618693646043539, + "flow/mag_ratio_mean": 0.5105963433161378, + "flow/mag_ratio_std": 0.20592432795092463, + "step": 4096 + }, + { + "epoch": 0.04257397956532133, + "eval_cos_loss": 0.476364528760314, + "eval_loss": 1.6391540355980396, + "eval_mse_loss": 1.4009717665612698, + "eval_runtime": 2.5129, + "eval_samples_per_second": 795.895, + "eval_steps_per_second": 12.734, + "flow/cos_sim": 0.5236354488879442, + "flow/improvement_ratio": 0.9618693646043539, + "flow/mag_ratio_mean": 0.5105963433161378, + "flow/mag_ratio_std": 0.20592432795092463, + "step": 4096 + }, + { + "epoch": 0.05321747445665166, + "grad_norm": 0.2645546495914459, + "learning_rate": 0.0009988085977910004, + "loss": 1.6617510318756104, + "step": 5120 + }, + { + "epoch": 0.05321747445665166, + "eval_cos_loss": 0.4789119102060795, + "eval_loss": 1.645260013639927, + "eval_mse_loss": 1.405804067850113, + "flow/cos_sim": 0.5210880534723401, + "flow/improvement_ratio": 0.9588682930916548, + "flow/mag_ratio_mean": 0.504288200289011, + "flow/mag_ratio_std": 0.20718340016901493, + "step": 5120 + }, + { + "epoch": 0.05321747445665166, + "eval_cos_loss": 0.4789119102060795, + "eval_loss": 1.645260013639927, + "eval_mse_loss": 1.405804067850113, + "eval_runtime": 3.0976, + "eval_samples_per_second": 645.654, + "eval_steps_per_second": 10.33, + "flow/cos_sim": 0.5210880534723401, + "flow/improvement_ratio": 0.9588682930916548, + "flow/mag_ratio_mean": 0.504288200289011, + "flow/mag_ratio_std": 0.20718340016901493, + "step": 5120 + }, + { + "epoch": 0.063860969347982, + "grad_norm": 0.2762889862060547, + "learning_rate": 0.0009973198042317873, + "loss": 1.645796775817871, + "step": 6144 + }, + { + "epoch": 0.063860969347982, + "eval_cos_loss": 0.4598818449303508, + "eval_loss": 1.5948525853455067, + "eval_mse_loss": 1.3649116680026054, + "flow/cos_sim": 0.5401182025671005, + "flow/improvement_ratio": 0.9647715575993061, + "flow/mag_ratio_mean": 0.5178880272433162, + "flow/mag_ratio_std": 0.21153279254212976, + "step": 6144 + }, + { + "epoch": 0.063860969347982, + "eval_cos_loss": 0.4598818449303508, + "eval_loss": 1.5948525853455067, + "eval_mse_loss": 1.3649116680026054, + "eval_runtime": 3.0831, + "eval_samples_per_second": 648.695, + "eval_steps_per_second": 10.379, + "flow/cos_sim": 0.5401182025671005, + "flow/improvement_ratio": 0.9647715575993061, + "flow/mag_ratio_mean": 0.5178880272433162, + "flow/mag_ratio_std": 0.21153279254212976, + "step": 6144 + }, + { + "epoch": 0.07450446423931233, + "grad_norm": 0.17679959535598755, + "learning_rate": 0.0009952377470151526, + "loss": 1.6353809833526611, + "step": 7168 + }, + { + "epoch": 0.07450446423931233, + "eval_cos_loss": 0.4634226718917489, + "eval_loss": 1.6022505089640617, + "eval_mse_loss": 1.3705391697585583, + "flow/cos_sim": 0.5365773290395737, + "flow/improvement_ratio": 0.9635819494724274, + "flow/mag_ratio_mean": 0.5194354858249426, + "flow/mag_ratio_std": 0.21515046246349812, + "step": 7168 + }, + { + "epoch": 0.07450446423931233, + "eval_cos_loss": 0.4634226718917489, + "eval_loss": 1.6022505089640617, + "eval_mse_loss": 1.3705391697585583, + "eval_runtime": 2.8419, + "eval_samples_per_second": 703.759, + "eval_steps_per_second": 11.26, + "flow/cos_sim": 0.5365773290395737, + "flow/improvement_ratio": 0.9635819494724274, + "flow/mag_ratio_mean": 0.5194354858249426, + "flow/mag_ratio_std": 0.21515046246349812, + "step": 7168 + }, + { + "epoch": 0.08514795913064266, + "grad_norm": 0.14975515007972717, + "learning_rate": 0.000992564909872628, + "loss": 1.6262034177780151, + "step": 8192 + }, + { + "epoch": 0.08514795913064266, + "eval_cos_loss": 0.45912545546889305, + "eval_loss": 1.5890175811946392, + "eval_mse_loss": 1.3594548553228378, + "flow/cos_sim": 0.5408745482563972, + "flow/improvement_ratio": 0.9590303134173155, + "flow/mag_ratio_mean": 0.5143361240625381, + "flow/mag_ratio_std": 0.21537457825616002, + "step": 8192 + }, + { + "epoch": 0.08514795913064266, + "eval_cos_loss": 0.45912545546889305, + "eval_loss": 1.5890175811946392, + "eval_mse_loss": 1.3594548553228378, + "eval_runtime": 2.9302, + "eval_samples_per_second": 682.537, + "eval_steps_per_second": 10.921, + "flow/cos_sim": 0.5408745482563972, + "flow/improvement_ratio": 0.9590303134173155, + "flow/mag_ratio_mean": 0.5143361240625381, + "flow/mag_ratio_std": 0.21537457825616002, + "step": 8192 + }, + { + "epoch": 0.09579145402197299, + "grad_norm": 0.19106586277484894, + "learning_rate": 0.000989307950724573, + "loss": 1.6214015483856201, + "step": 9216 + }, + { + "epoch": 0.09579145402197299, + "eval_cos_loss": 0.4567577252164483, + "eval_loss": 1.5844898335635662, + "eval_mse_loss": 1.356110967695713, + "flow/cos_sim": 0.5432424321770668, + "flow/improvement_ratio": 0.9650511220097542, + "flow/mag_ratio_mean": 0.5244949720799923, + "flow/mag_ratio_std": 0.21130397450178862, + "step": 9216 + }, + { + "epoch": 0.09579145402197299, + "eval_cos_loss": 0.4567577252164483, + "eval_loss": 1.5844898335635662, + "eval_mse_loss": 1.356110967695713, + "eval_runtime": 3.0508, + "eval_samples_per_second": 655.562, + "eval_steps_per_second": 10.489, + "flow/cos_sim": 0.5432424321770668, + "flow/improvement_ratio": 0.9650511220097542, + "flow/mag_ratio_mean": 0.5244949720799923, + "flow/mag_ratio_std": 0.21130397450178862, + "step": 9216 + }, + { + "epoch": 0.10643494891330332, + "grad_norm": 0.22245089709758759, + "learning_rate": 0.000985464388035817, + "loss": 1.6132733821868896, + "step": 10240 + }, + { + "epoch": 0.10643494891330332, + "eval_cos_loss": 0.4598613306879997, + "eval_loss": 1.589576181024313, + "eval_mse_loss": 1.359645515680313, + "flow/cos_sim": 0.5401386898010969, + "flow/improvement_ratio": 0.9610863700509071, + "flow/mag_ratio_mean": 0.5160716716200113, + "flow/mag_ratio_std": 0.21545762522146106, + "step": 10240 + }, + { + "epoch": 0.10643494891330332, + "eval_cos_loss": 0.4598613306879997, + "eval_loss": 1.589576181024313, + "eval_mse_loss": 1.359645515680313, + "eval_runtime": 3.1847, + "eval_samples_per_second": 627.993, + "eval_steps_per_second": 10.048, + "flow/cos_sim": 0.5401386898010969, + "flow/improvement_ratio": 0.9610863700509071, + "flow/mag_ratio_mean": 0.5160716716200113, + "flow/mag_ratio_std": 0.21545762522146106, + "step": 10240 + }, + { + "epoch": 0.11707844380463366, + "grad_norm": 0.1567550003528595, + "learning_rate": 0.0009810417042745768, + "loss": 1.6070518493652344, + "step": 11264 + }, + { + "epoch": 0.11707844380463366, + "eval_cos_loss": 0.4550258554518223, + "eval_loss": 1.577816877514124, + "eval_mse_loss": 1.350303951650858, + "flow/cos_sim": 0.5449741557240486, + "flow/improvement_ratio": 0.9648044053465128, + "flow/mag_ratio_mean": 0.5290831215679646, + "flow/mag_ratio_std": 0.21279342425987124, + "step": 11264 + }, + { + "epoch": 0.11707844380463366, + "eval_cos_loss": 0.4550258554518223, + "eval_loss": 1.577816877514124, + "eval_mse_loss": 1.350303951650858, + "eval_runtime": 2.6352, + "eval_samples_per_second": 758.945, + "eval_steps_per_second": 12.143, + "flow/cos_sim": 0.5449741557240486, + "flow/improvement_ratio": 0.9648044053465128, + "flow/mag_ratio_mean": 0.5290831215679646, + "flow/mag_ratio_std": 0.21279342425987124, + "step": 11264 + }, + { + "epoch": 0.127721938695964, + "grad_norm": 0.19316641986370087, + "learning_rate": 0.0009760451753569162, + "loss": 1.6028146743774414, + "step": 12288 + }, + { + "epoch": 0.127721938695964, + "eval_cos_loss": 0.45873888209462166, + "eval_loss": 1.5860362015664577, + "eval_mse_loss": 1.3566667586565018, + "flow/cos_sim": 0.5412612538784742, + "flow/improvement_ratio": 0.9610528890043497, + "flow/mag_ratio_mean": 0.5225661229342222, + "flow/mag_ratio_std": 0.2149493475444615, + "step": 12288 + }, + { + "epoch": 0.127721938695964, + "eval_cos_loss": 0.45873888209462166, + "eval_loss": 1.5860362015664577, + "eval_mse_loss": 1.3566667586565018, + "eval_runtime": 2.6091, + "eval_samples_per_second": 766.55, + "eval_steps_per_second": 12.265, + "flow/cos_sim": 0.5412612538784742, + "flow/improvement_ratio": 0.9610528890043497, + "flow/mag_ratio_mean": 0.5225661229342222, + "flow/mag_ratio_std": 0.2149493475444615, + "step": 12288 + }, + { + "epoch": 0.13836543358729433, + "grad_norm": 0.17066629230976105, + "learning_rate": 0.000970486470662755, + "loss": 1.5989067554473877, + "step": 13312 + }, + { + "epoch": 0.13836543358729433, + "eval_cos_loss": 0.4526587063446641, + "eval_loss": 1.5703520886600018, + "eval_mse_loss": 1.3440227322280407, + "flow/cos_sim": 0.547341376543045, + "flow/improvement_ratio": 0.9634687285870314, + "flow/mag_ratio_mean": 0.5251006819307804, + "flow/mag_ratio_std": 0.2169443154707551, + "step": 13312 + }, + { + "epoch": 0.13836543358729433, + "eval_cos_loss": 0.4526587063446641, + "eval_loss": 1.5703520886600018, + "eval_mse_loss": 1.3440227322280407, + "eval_runtime": 2.6502, + "eval_samples_per_second": 754.652, + "eval_steps_per_second": 12.074, + "flow/cos_sim": 0.547341376543045, + "flow/improvement_ratio": 0.9634687285870314, + "flow/mag_ratio_mean": 0.5251006819307804, + "flow/mag_ratio_std": 0.2169443154707551, + "step": 13312 + }, + { + "epoch": 0.14900892847862465, + "grad_norm": 0.19086262583732605, + "learning_rate": 0.0009643613549160033, + "loss": 1.5941526889801025, + "step": 14336 + }, + { + "epoch": 0.14900892847862465, + "eval_cos_loss": 0.45674111880362034, + "eval_loss": 1.5803881026804447, + "eval_mse_loss": 1.3520175516605377, + "flow/cos_sim": 0.5432589612901211, + "flow/improvement_ratio": 0.9569191709160805, + "flow/mag_ratio_mean": 0.5241195531561971, + "flow/mag_ratio_std": 0.2207528604194522, + "step": 14336 + }, + { + "epoch": 0.14900892847862465, + "eval_cos_loss": 0.45674111880362034, + "eval_loss": 1.5803881026804447, + "eval_mse_loss": 1.3520175516605377, + "eval_runtime": 2.84, + "eval_samples_per_second": 704.229, + "eval_steps_per_second": 11.268, + "flow/cos_sim": 0.5432589612901211, + "flow/improvement_ratio": 0.9569191709160805, + "flow/mag_ratio_mean": 0.5241195531561971, + "flow/mag_ratio_std": 0.2207528604194522, + "step": 14336 + }, + { + "epoch": 0.159652423369955, + "grad_norm": 0.20660291612148285, + "learning_rate": 0.0009576890825691249, + "loss": 1.5903245210647583, + "step": 15360 + }, + { + "epoch": 0.159652423369955, + "eval_cos_loss": 0.4470532648265362, + "eval_loss": 1.5533855073153973, + "eval_mse_loss": 1.329858873039484, + "flow/cos_sim": 0.5529466420412064, + "flow/improvement_ratio": 0.9680595081299543, + "flow/mag_ratio_mean": 0.5352848172187805, + "flow/mag_ratio_std": 0.22097993176430464, + "step": 15360 + }, + { + "epoch": 0.159652423369955, + "eval_cos_loss": 0.4470532648265362, + "eval_loss": 1.5533855073153973, + "eval_mse_loss": 1.329858873039484, + "eval_runtime": 2.6578, + "eval_samples_per_second": 752.494, + "eval_steps_per_second": 12.04, + "flow/cos_sim": 0.5529466420412064, + "flow/improvement_ratio": 0.9680595081299543, + "flow/mag_ratio_mean": 0.5352848172187805, + "flow/mag_ratio_std": 0.22097993176430464, + "step": 15360 + }, + { + "epoch": 0.17029591826128532, + "grad_norm": 0.23885692656040192, + "learning_rate": 0.0009504645698990064, + "loss": 1.589218020439148, + "step": 16384 + }, + { + "epoch": 0.17029591826128532, + "eval_cos_loss": 0.44447089545428753, + "eval_loss": 1.5484142042696476, + "eval_mse_loss": 1.3261787556111813, + "flow/cos_sim": 0.5555290877819061, + "flow/improvement_ratio": 0.9635521955788136, + "flow/mag_ratio_mean": 0.5299641713500023, + "flow/mag_ratio_std": 0.215805409476161, + "step": 16384 + }, + { + "epoch": 0.17029591826128532, + "eval_cos_loss": 0.44447089545428753, + "eval_loss": 1.5484142042696476, + "eval_mse_loss": 1.3261787556111813, + "eval_runtime": 2.6371, + "eval_samples_per_second": 758.402, + "eval_steps_per_second": 12.134, + "flow/cos_sim": 0.5555290877819061, + "flow/improvement_ratio": 0.9635521955788136, + "flow/mag_ratio_mean": 0.5299641713500023, + "flow/mag_ratio_std": 0.215805409476161, + "step": 16384 + }, + { + "epoch": 0.18093941315261566, + "grad_norm": 0.2062983363866806, + "learning_rate": 0.0009427105273394636, + "loss": 1.585401177406311, + "step": 17408 + }, + { + "epoch": 0.18093941315261566, + "eval_cos_loss": 0.45094432309269905, + "eval_loss": 1.5633347816765308, + "eval_mse_loss": 1.3378626182675362, + "flow/cos_sim": 0.5490557141602039, + "flow/improvement_ratio": 0.9632246606051922, + "flow/mag_ratio_mean": 0.5234426287934184, + "flow/mag_ratio_std": 0.22044725203886628, + "step": 17408 + }, + { + "epoch": 0.18093941315261566, + "eval_cos_loss": 0.45094432309269905, + "eval_loss": 1.5633347816765308, + "eval_mse_loss": 1.3378626182675362, + "eval_runtime": 2.5802, + "eval_samples_per_second": 775.125, + "eval_steps_per_second": 12.402, + "flow/cos_sim": 0.5490557141602039, + "flow/improvement_ratio": 0.9632246606051922, + "flow/mag_ratio_mean": 0.5234426287934184, + "flow/mag_ratio_std": 0.22044725203886628, + "step": 17408 + }, + { + "epoch": 0.19158290804394598, + "grad_norm": 0.15858766436576843, + "learning_rate": 0.0009344210469473947, + "loss": 1.5826770067214966, + "step": 18432 + }, + { + "epoch": 0.19158290804394598, + "eval_cos_loss": 0.44898632261902094, + "eval_loss": 1.5564597770571709, + "eval_mse_loss": 1.331966608762741, + "flow/cos_sim": 0.5510137844830751, + "flow/improvement_ratio": 0.9625816307961941, + "flow/mag_ratio_mean": 0.5298811597749591, + "flow/mag_ratio_std": 0.22253544814884663, + "step": 18432 + }, + { + "epoch": 0.19158290804394598, + "eval_cos_loss": 0.44898632261902094, + "eval_loss": 1.5564597770571709, + "eval_mse_loss": 1.331966608762741, + "eval_runtime": 2.5531, + "eval_samples_per_second": 783.347, + "eval_steps_per_second": 12.534, + "flow/cos_sim": 0.5510137844830751, + "flow/improvement_ratio": 0.9625816307961941, + "flow/mag_ratio_mean": 0.5298811597749591, + "flow/mag_ratio_std": 0.22253544814884663, + "step": 18432 + }, + { + "epoch": 0.20222640293527633, + "grad_norm": 0.2525703012943268, + "learning_rate": 0.0009256133361993658, + "loss": 1.5798900127410889, + "step": 19456 + }, + { + "epoch": 0.20222640293527633, + "eval_cos_loss": 0.45141084399074316, + "eval_loss": 1.567859135568142, + "eval_mse_loss": 1.3421537093818188, + "flow/cos_sim": 0.5485891196876764, + "flow/improvement_ratio": 0.96523248963058, + "flow/mag_ratio_mean": 0.5181732634082437, + "flow/mag_ratio_std": 0.22030179109424353, + "step": 19456 + }, + { + "epoch": 0.20222640293527633, + "eval_cos_loss": 0.45141084399074316, + "eval_loss": 1.567859135568142, + "eval_mse_loss": 1.3421537093818188, + "eval_runtime": 2.5625, + "eval_samples_per_second": 780.491, + "eval_steps_per_second": 12.488, + "flow/cos_sim": 0.5485891196876764, + "flow/improvement_ratio": 0.96523248963058, + "flow/mag_ratio_mean": 0.5181732634082437, + "flow/mag_ratio_std": 0.22030179109424353, + "step": 19456 + }, + { + "epoch": 0.21286989782660665, + "grad_norm": 0.18312996625900269, + "learning_rate": 0.0009163072432159066, + "loss": 1.579535961151123, + "step": 20480 + }, + { + "epoch": 0.21286989782660665, + "eval_cos_loss": 0.45327545143663883, + "eval_loss": 1.5679056644439697, + "eval_mse_loss": 1.3412679433822632, + "flow/cos_sim": 0.5467245355248451, + "flow/improvement_ratio": 0.9616729654371738, + "flow/mag_ratio_mean": 0.5273217614740133, + "flow/mag_ratio_std": 0.2235504975542426, + "step": 20480 + }, + { + "epoch": 0.21286989782660665, + "eval_cos_loss": 0.45327545143663883, + "eval_loss": 1.5679056644439697, + "eval_mse_loss": 1.3412679433822632, + "eval_runtime": 2.5697, + "eval_samples_per_second": 778.293, + "eval_steps_per_second": 12.453, + "flow/cos_sim": 0.5467245355248451, + "flow/improvement_ratio": 0.9616729654371738, + "flow/mag_ratio_mean": 0.5273217614740133, + "flow/mag_ratio_std": 0.2235504975542426, + "step": 20480 + }, + { + "epoch": 0.223513392717937, + "grad_norm": 0.21262691915035248, + "learning_rate": 0.0009064956775190607, + "loss": 1.577104926109314, + "step": 21504 + }, + { + "epoch": 0.223513392717937, + "eval_cos_loss": 0.4483450762927532, + "eval_loss": 1.553330171853304, + "eval_mse_loss": 1.329157643020153, + "flow/cos_sim": 0.5516549795866013, + "flow/improvement_ratio": 0.9627660047262907, + "flow/mag_ratio_mean": 0.5387043142691255, + "flow/mag_ratio_std": 0.2251730626448989, + "step": 21504 + }, + { + "epoch": 0.223513392717937, + "eval_cos_loss": 0.4483450762927532, + "eval_loss": 1.553330171853304, + "eval_mse_loss": 1.329157643020153, + "eval_runtime": 2.5071, + "eval_samples_per_second": 797.724, + "eval_steps_per_second": 12.764, + "flow/cos_sim": 0.5516549795866013, + "flow/improvement_ratio": 0.9627660047262907, + "flow/mag_ratio_mean": 0.5387043142691255, + "flow/mag_ratio_std": 0.2251730626448989, + "step": 21504 + }, + { + "epoch": 0.2341568876092673, + "grad_norm": 0.17988671362400055, + "learning_rate": 0.0008961991942494195, + "loss": 1.574266791343689, + "step": 22528 + }, + { + "epoch": 0.2341568876092673, + "eval_cos_loss": 0.44411917496472597, + "eval_loss": 1.543789055198431, + "eval_mse_loss": 1.3217294700443745, + "flow/cos_sim": 0.5558808352798223, + "flow/improvement_ratio": 0.9671246875077486, + "flow/mag_ratio_mean": 0.5348946927115321, + "flow/mag_ratio_std": 0.22368196118623018, + "step": 22528 + }, + { + "epoch": 0.2341568876092673, + "eval_cos_loss": 0.44411917496472597, + "eval_loss": 1.543789055198431, + "eval_mse_loss": 1.3217294700443745, + "eval_runtime": 2.5967, + "eval_samples_per_second": 770.215, + "eval_steps_per_second": 12.323, + "flow/cos_sim": 0.5558808352798223, + "flow/improvement_ratio": 0.9671246875077486, + "flow/mag_ratio_mean": 0.5348946927115321, + "flow/mag_ratio_std": 0.22368196118623018, + "step": 22528 + }, + { + "epoch": 0.24480038250059766, + "grad_norm": 0.22547593712806702, + "learning_rate": 0.0008854408194461756, + "loss": 1.5733323097229004, + "step": 23552 + }, + { + "epoch": 0.24480038250059766, + "eval_cos_loss": 0.44172694999724627, + "eval_loss": 1.5377833917737007, + "eval_mse_loss": 1.316919919103384, + "flow/cos_sim": 0.5582730043679476, + "flow/improvement_ratio": 0.9642701335251331, + "flow/mag_ratio_mean": 0.5346939843147993, + "flow/mag_ratio_std": 0.22327208751812577, + "step": 23552 + }, + { + "epoch": 0.24480038250059766, + "eval_cos_loss": 0.44172694999724627, + "eval_loss": 1.5377833917737007, + "eval_mse_loss": 1.316919919103384, + "eval_runtime": 3.1395, + "eval_samples_per_second": 637.04, + "eval_steps_per_second": 10.193, + "flow/cos_sim": 0.5582730043679476, + "flow/improvement_ratio": 0.9642701335251331, + "flow/mag_ratio_mean": 0.5346939843147993, + "flow/mag_ratio_std": 0.22327208751812577, + "step": 23552 + }, + { + "epoch": 0.255443877391928, + "grad_norm": 0.2300369143486023, + "learning_rate": 0.0008742123561119935, + "loss": 1.569944143295288, + "step": 24576 + }, + { + "epoch": 0.255443877391928, + "eval_cos_loss": 0.447942478582263, + "eval_loss": 1.553868442773819, + "eval_mse_loss": 1.3298972100019455, + "flow/cos_sim": 0.5520575055852532, + "flow/improvement_ratio": 0.9638102632015944, + "flow/mag_ratio_mean": 0.5306164929643273, + "flow/mag_ratio_std": 0.22182104969397187, + "step": 24576 + }, + { + "epoch": 0.255443877391928, + "eval_cos_loss": 0.447942478582263, + "eval_loss": 1.553868442773819, + "eval_mse_loss": 1.3298972100019455, + "eval_runtime": 2.5886, + "eval_samples_per_second": 772.612, + "eval_steps_per_second": 12.362, + "flow/cos_sim": 0.5520575055852532, + "flow/improvement_ratio": 0.9638102632015944, + "flow/mag_ratio_mean": 0.5306164929643273, + "flow/mag_ratio_std": 0.22182104969397187, + "step": 24576 + }, + { + "epoch": 0.26608737228325835, + "grad_norm": 0.2177908569574356, + "learning_rate": 0.0008625491011983832, + "loss": 1.5683772563934326, + "step": 25600 + }, + { + "epoch": 0.26608737228325835, + "eval_cos_loss": 0.45118876080960035, + "eval_loss": 1.5609249621629715, + "eval_mse_loss": 1.3353305757045746, + "flow/cos_sim": 0.5488111022859812, + "flow/improvement_ratio": 0.9652206618338823, + "flow/mag_ratio_mean": 0.5250881398096681, + "flow/mag_ratio_std": 0.22340481635183096, + "step": 25600 + }, + { + "epoch": 0.26608737228325835, + "eval_cos_loss": 0.45118876080960035, + "eval_loss": 1.5609249621629715, + "eval_mse_loss": 1.3353305757045746, + "eval_runtime": 2.5832, + "eval_samples_per_second": 774.241, + "eval_steps_per_second": 12.388, + "flow/cos_sim": 0.5488111022859812, + "flow/improvement_ratio": 0.9652206618338823, + "flow/mag_ratio_mean": 0.5250881398096681, + "flow/mag_ratio_std": 0.22340481635183096, + "step": 25600 + }, + { + "epoch": 0.27673086717458867, + "grad_norm": 0.13252000510692596, + "learning_rate": 0.0008504421682637403, + "loss": 1.5673582553863525, + "step": 26624 + }, + { + "epoch": 0.27673086717458867, + "eval_cos_loss": 0.44563145097345114, + "eval_loss": 1.5506689585745335, + "eval_mse_loss": 1.3278532326221466, + "flow/cos_sim": 0.5543686226010323, + "flow/improvement_ratio": 0.9666622839868069, + "flow/mag_ratio_mean": 0.5269411941990256, + "flow/mag_ratio_std": 0.21877468656748533, + "step": 26624 + }, + { + "epoch": 0.27673086717458867, + "eval_cos_loss": 0.44563145097345114, + "eval_loss": 1.5506689585745335, + "eval_mse_loss": 1.3278532326221466, + "eval_runtime": 2.617, + "eval_samples_per_second": 764.244, + "eval_steps_per_second": 12.228, + "flow/cos_sim": 0.5543686226010323, + "flow/improvement_ratio": 0.9666622839868069, + "flow/mag_ratio_mean": 0.5269411941990256, + "flow/mag_ratio_std": 0.21877468656748533, + "step": 26624 + }, + { + "epoch": 0.287374362065919, + "grad_norm": 0.2598721981048584, + "learning_rate": 0.0008379296157504366, + "loss": 1.564971923828125, + "step": 27648 + }, + { + "epoch": 0.287374362065919, + "eval_cos_loss": 0.4419550793245435, + "eval_loss": 1.5396056547760963, + "eval_mse_loss": 1.3186281062662601, + "flow/cos_sim": 0.5580449867993593, + "flow/improvement_ratio": 0.9673310127109289, + "flow/mag_ratio_mean": 0.5319117670878768, + "flow/mag_ratio_std": 0.22065124148502946, + "step": 27648 + }, + { + "epoch": 0.287374362065919, + "eval_cos_loss": 0.4419550793245435, + "eval_loss": 1.5396056547760963, + "eval_mse_loss": 1.3186281062662601, + "eval_runtime": 3.0156, + "eval_samples_per_second": 663.219, + "eval_steps_per_second": 10.612, + "flow/cos_sim": 0.5580449867993593, + "flow/improvement_ratio": 0.9673310127109289, + "flow/mag_ratio_mean": 0.5319117670878768, + "flow/mag_ratio_std": 0.22065124148502946, + "step": 27648 + }, + { + "epoch": 0.2980178569572493, + "grad_norm": 0.274239718914032, + "learning_rate": 0.0008250147265053921, + "loss": 1.564111590385437, + "step": 28672 + }, + { + "epoch": 0.2980178569572493, + "eval_cos_loss": 0.44205061066895723, + "eval_loss": 1.5401594452559948, + "eval_mse_loss": 1.319134145975113, + "flow/cos_sim": 0.5579493436962366, + "flow/improvement_ratio": 0.9671094436198473, + "flow/mag_ratio_mean": 0.5367719177156687, + "flow/mag_ratio_std": 0.2205441533587873, + "step": 28672 + }, + { + "epoch": 0.2980178569572493, + "eval_cos_loss": 0.44205061066895723, + "eval_loss": 1.5401594452559948, + "eval_mse_loss": 1.319134145975113, + "eval_runtime": 2.6525, + "eval_samples_per_second": 754.016, + "eval_steps_per_second": 12.064, + "flow/cos_sim": 0.5579493436962366, + "flow/improvement_ratio": 0.9671094436198473, + "flow/mag_ratio_mean": 0.5367719177156687, + "flow/mag_ratio_std": 0.2205441533587873, + "step": 28672 + }, + { + "epoch": 0.3086613518485797, + "grad_norm": 0.2095516473054886, + "learning_rate": 0.000811699689939724, + "loss": 1.5625946521759033, + "step": 29696 + }, + { + "epoch": 0.3086613518485797, + "eval_cos_loss": 0.44621053244918585, + "eval_loss": 1.5496392995119095, + "eval_mse_loss": 1.3265340402722359, + "flow/cos_sim": 0.5537894666194916, + "flow/improvement_ratio": 0.9614951889961958, + "flow/mag_ratio_mean": 0.5277672996744514, + "flow/mag_ratio_std": 0.22011788561940193, + "step": 29696 + }, + { + "epoch": 0.3086613518485797, + "eval_cos_loss": 0.44621053244918585, + "eval_loss": 1.5496392995119095, + "eval_mse_loss": 1.3265340402722359, + "eval_runtime": 2.5404, + "eval_samples_per_second": 787.269, + "eval_steps_per_second": 12.596, + "flow/cos_sim": 0.5537894666194916, + "flow/improvement_ratio": 0.9614951889961958, + "flow/mag_ratio_mean": 0.5277672996744514, + "flow/mag_ratio_std": 0.22011788561940193, + "step": 29696 + }, + { + "epoch": 0.31930484673991, + "grad_norm": 0.15808935463428497, + "learning_rate": 0.0007980128200054721, + "loss": 1.5646651983261108, + "step": 30720 + }, + { + "epoch": 0.31930484673991, + "eval_cos_loss": 0.44148214533925056, + "eval_loss": 1.5364415682852268, + "eval_mse_loss": 1.3157004974782467, + "flow/cos_sim": 0.5585179291665554, + "flow/improvement_ratio": 0.9648495689034462, + "flow/mag_ratio_mean": 0.537518884986639, + "flow/mag_ratio_std": 0.22350562876090407, + "step": 30720 + }, + { + "epoch": 0.31930484673991, + "eval_cos_loss": 0.44148214533925056, + "eval_loss": 1.5364415682852268, + "eval_mse_loss": 1.3157004974782467, + "eval_runtime": 2.5821, + "eval_samples_per_second": 774.564, + "eval_steps_per_second": 12.393, + "flow/cos_sim": 0.5585179291665554, + "flow/improvement_ratio": 0.9648495689034462, + "flow/mag_ratio_mean": 0.537518884986639, + "flow/mag_ratio_std": 0.22350562876090407, + "step": 30720 + }, + { + "epoch": 0.3299483416312403, + "grad_norm": 0.1921176314353943, + "learning_rate": 0.0007839843253324567, + "loss": 1.563474416732788, + "step": 31744 + }, + { + "epoch": 0.3299483416312403, + "eval_cos_loss": 0.4404078619554639, + "eval_loss": 1.534349039196968, + "eval_mse_loss": 1.3141451105475426, + "flow/cos_sim": 0.5595921669155359, + "flow/improvement_ratio": 0.9649890139698982, + "flow/mag_ratio_mean": 0.5285989735275507, + "flow/mag_ratio_std": 0.22230371600016952, + "step": 31744 + }, + { + "epoch": 0.3299483416312403, + "eval_cos_loss": 0.4404078619554639, + "eval_loss": 1.534349039196968, + "eval_mse_loss": 1.3141451105475426, + "eval_runtime": 2.5419, + "eval_samples_per_second": 786.808, + "eval_steps_per_second": 12.589, + "flow/cos_sim": 0.5595921669155359, + "flow/improvement_ratio": 0.9649890139698982, + "flow/mag_ratio_mean": 0.5285989735275507, + "flow/mag_ratio_std": 0.22230371600016952, + "step": 31744 + }, + { + "epoch": 0.34059183652257063, + "grad_norm": 0.19485324621200562, + "learning_rate": 0.0007696035173607825, + "loss": 1.5625982284545898, + "step": 32768 + }, + { + "epoch": 0.34059183652257063, + "eval_cos_loss": 0.4499282343313098, + "eval_loss": 1.5567349456250668, + "eval_mse_loss": 1.3317708261311054, + "flow/cos_sim": 0.5500718057155609, + "flow/improvement_ratio": 0.9645203202962875, + "flow/mag_ratio_mean": 0.5316947773098946, + "flow/mag_ratio_std": 0.22561145247891545, + "step": 32768 + }, + { + "epoch": 0.34059183652257063, + "eval_cos_loss": 0.4499282343313098, + "eval_loss": 1.5567349456250668, + "eval_mse_loss": 1.3317708261311054, + "eval_runtime": 2.5397, + "eval_samples_per_second": 787.504, + "eval_steps_per_second": 12.6, + "flow/cos_sim": 0.5500718057155609, + "flow/improvement_ratio": 0.9645203202962875, + "flow/mag_ratio_mean": 0.5316947773098946, + "flow/mag_ratio_std": 0.22561145247891545, + "step": 32768 + }, + { + "epoch": 0.351235331413901, + "grad_norm": 0.18854600191116333, + "learning_rate": 0.0007549156025151, + "loss": 1.5604270696640015, + "step": 33792 + }, + { + "epoch": 0.351235331413901, + "eval_cos_loss": 0.44613189715892076, + "eval_loss": 1.5455855540931225, + "eval_mse_loss": 1.322519600391388, + "flow/cos_sim": 0.5538681279867887, + "flow/improvement_ratio": 0.9590773209929466, + "flow/mag_ratio_mean": 0.5333189619705081, + "flow/mag_ratio_std": 0.22768286149948835, + "step": 33792 + }, + { + "epoch": 0.351235331413901, + "eval_cos_loss": 0.44613189715892076, + "eval_loss": 1.5455855540931225, + "eval_mse_loss": 1.322519600391388, + "eval_runtime": 2.5511, + "eval_samples_per_second": 783.987, + "eval_steps_per_second": 12.544, + "flow/cos_sim": 0.5538681279867887, + "flow/improvement_ratio": 0.9590773209929466, + "flow/mag_ratio_mean": 0.5333189619705081, + "flow/mag_ratio_std": 0.22768286149948835, + "step": 33792 + }, + { + "epoch": 0.3618788263052313, + "grad_norm": 0.1909618228673935, + "learning_rate": 0.0007399093898115421, + "loss": 1.559531807899475, + "step": 34816 + }, + { + "epoch": 0.3618788263052313, + "eval_cos_loss": 0.4420803328976035, + "eval_loss": 1.5389960557222366, + "eval_mse_loss": 1.3179558925330639, + "flow/cos_sim": 0.5579197406768799, + "flow/improvement_ratio": 0.959646550938487, + "flow/mag_ratio_mean": 0.5326429791748524, + "flow/mag_ratio_std": 0.22111017350107431, + "step": 34816 + }, + { + "epoch": 0.3618788263052313, + "eval_cos_loss": 0.4420803328976035, + "eval_loss": 1.5389960557222366, + "eval_mse_loss": 1.3179558925330639, + "eval_runtime": 2.5395, + "eval_samples_per_second": 787.558, + "eval_steps_per_second": 12.601, + "flow/cos_sim": 0.5579197406768799, + "flow/improvement_ratio": 0.959646550938487, + "flow/mag_ratio_mean": 0.5326429791748524, + "flow/mag_ratio_std": 0.22111017350107431, + "step": 34816 + }, + { + "epoch": 0.37252232119656165, + "grad_norm": 0.1629696786403656, + "learning_rate": 0.0007246320516499633, + "loss": 1.5580956935882568, + "step": 35840 + }, + { + "epoch": 0.37252232119656165, + "eval_cos_loss": 0.44205798115581274, + "eval_loss": 1.5348509810864925, + "eval_mse_loss": 1.3138219900429249, + "flow/cos_sim": 0.5579419694840908, + "flow/improvement_ratio": 0.9621348176151514, + "flow/mag_ratio_mean": 0.5318824276328087, + "flow/mag_ratio_std": 0.226469362154603, + "step": 35840 + }, + { + "epoch": 0.37252232119656165, + "eval_cos_loss": 0.44205798115581274, + "eval_loss": 1.5348509810864925, + "eval_mse_loss": 1.3138219900429249, + "eval_runtime": 2.557, + "eval_samples_per_second": 782.177, + "eval_steps_per_second": 12.515, + "flow/cos_sim": 0.5579419694840908, + "flow/improvement_ratio": 0.9621348176151514, + "flow/mag_ratio_mean": 0.5318824276328087, + "flow/mag_ratio_std": 0.226469362154603, + "step": 35840 + }, + { + "epoch": 0.38316581608789196, + "grad_norm": 0.2269536405801773, + "learning_rate": 0.0007090719479543767, + "loss": 1.557045578956604, + "step": 36864 + }, + { + "epoch": 0.38316581608789196, + "eval_cos_loss": 0.4411419341340661, + "eval_loss": 1.5364714972674847, + "eval_mse_loss": 1.315900530666113, + "flow/cos_sim": 0.5588581711053848, + "flow/improvement_ratio": 0.965403363108635, + "flow/mag_ratio_mean": 0.5407936815172434, + "flow/mag_ratio_std": 0.22326642088592052, + "step": 36864 + }, + { + "epoch": 0.38316581608789196, + "eval_cos_loss": 0.4411419341340661, + "eval_loss": 1.5364714972674847, + "eval_mse_loss": 1.315900530666113, + "eval_runtime": 2.5319, + "eval_samples_per_second": 789.932, + "eval_steps_per_second": 12.639, + "flow/cos_sim": 0.5588581711053848, + "flow/improvement_ratio": 0.965403363108635, + "flow/mag_ratio_mean": 0.5407936815172434, + "flow/mag_ratio_std": 0.22326642088592052, + "step": 36864 + }, + { + "epoch": 0.39380931097922234, + "grad_norm": 0.18450024724006653, + "learning_rate": 0.0006932779922946351, + "loss": 1.5578693151474, + "step": 37888 + }, + { + "epoch": 0.39380931097922234, + "eval_cos_loss": 0.44131703954190016, + "eval_loss": 1.5340029932558537, + "eval_mse_loss": 1.3133444860577583, + "flow/cos_sim": 0.5586829409003258, + "flow/improvement_ratio": 0.9628860391676426, + "flow/mag_ratio_mean": 0.537332147359848, + "flow/mag_ratio_std": 0.22678001504391432, + "step": 37888 + }, + { + "epoch": 0.39380931097922234, + "eval_cos_loss": 0.44131703954190016, + "eval_loss": 1.5340029932558537, + "eval_mse_loss": 1.3133444860577583, + "eval_runtime": 2.555, + "eval_samples_per_second": 782.783, + "eval_steps_per_second": 12.525, + "flow/cos_sim": 0.5586829409003258, + "flow/improvement_ratio": 0.9628860391676426, + "flow/mag_ratio_mean": 0.537332147359848, + "flow/mag_ratio_std": 0.22678001504391432, + "step": 37888 + }, + { + "epoch": 0.40445280587055266, + "grad_norm": 0.20654521882534027, + "learning_rate": 0.0006772381509746807, + "loss": 1.5568833351135254, + "step": 38912 + }, + { + "epoch": 0.40445280587055266, + "eval_cos_loss": 0.4440508605912328, + "eval_loss": 1.540926594287157, + "eval_mse_loss": 1.3189011700451374, + "flow/cos_sim": 0.5559491030871868, + "flow/improvement_ratio": 0.964597575366497, + "flow/mag_ratio_mean": 0.5335862170904875, + "flow/mag_ratio_std": 0.22821834543719888, + "step": 38912 + }, + { + "epoch": 0.40445280587055266, + "eval_cos_loss": 0.4440508605912328, + "eval_loss": 1.540926594287157, + "eval_mse_loss": 1.3189011700451374, + "eval_runtime": 2.9265, + "eval_samples_per_second": 683.411, + "eval_steps_per_second": 10.935, + "flow/cos_sim": 0.5559491030871868, + "flow/improvement_ratio": 0.964597575366497, + "flow/mag_ratio_mean": 0.5335862170904875, + "flow/mag_ratio_std": 0.22821834543719888, + "step": 38912 + }, + { + "epoch": 0.415096300761883, + "grad_norm": 0.20580987632274628, + "learning_rate": 0.0006609868783930164, + "loss": 1.5565650463104248, + "step": 39936 + }, + { + "epoch": 0.415096300761883, + "eval_cos_loss": 0.4446534486487508, + "eval_loss": 1.5423276983201504, + "eval_mse_loss": 1.3200009688735008, + "flow/cos_sim": 0.5553465932607651, + "flow/improvement_ratio": 0.9601697400212288, + "flow/mag_ratio_mean": 0.5308061949908733, + "flow/mag_ratio_std": 0.22724535362794995, + "step": 39936 + }, + { + "epoch": 0.415096300761883, + "eval_cos_loss": 0.4446534486487508, + "eval_loss": 1.5423276983201504, + "eval_mse_loss": 1.3200009688735008, + "eval_runtime": 2.5315, + "eval_samples_per_second": 790.058, + "eval_steps_per_second": 12.641, + "flow/cos_sim": 0.5553465932607651, + "flow/improvement_ratio": 0.9601697400212288, + "flow/mag_ratio_mean": 0.5308061949908733, + "flow/mag_ratio_std": 0.22724535362794995, + "step": 39936 + }, + { + "epoch": 0.4257397956532133, + "grad_norm": 0.17006264626979828, + "learning_rate": 0.0006445597062966236, + "loss": 1.5565887689590454, + "step": 40960 + }, + { + "epoch": 0.4257397956532133, + "eval_cos_loss": 0.4380533881485462, + "eval_loss": 1.5283529199659824, + "eval_mse_loss": 1.3093262203037739, + "flow/cos_sim": 0.5619466044008732, + "flow/improvement_ratio": 0.9661596808582544, + "flow/mag_ratio_mean": 0.5353248585015535, + "flow/mag_ratio_std": 0.22140436619520187, + "step": 40960 + }, + { + "epoch": 0.4257397956532133, + "eval_cos_loss": 0.4380533881485462, + "eval_loss": 1.5283529199659824, + "eval_mse_loss": 1.3093262203037739, + "eval_runtime": 2.6667, + "eval_samples_per_second": 749.988, + "eval_steps_per_second": 12.0, + "flow/cos_sim": 0.5619466044008732, + "flow/improvement_ratio": 0.9661596808582544, + "flow/mag_ratio_mean": 0.5353248585015535, + "flow/mag_ratio_std": 0.22140436619520187, + "step": 40960 + }, + { + "epoch": 0.43638329054454367, + "grad_norm": 0.2264794260263443, + "learning_rate": 0.0006279604223844502, + "loss": 1.5556617975234985, + "step": 41984 + }, + { + "epoch": 0.43638329054454367, + "eval_cos_loss": 0.4411089513450861, + "eval_loss": 1.5351563543081284, + "eval_mse_loss": 1.3146018758416176, + "flow/cos_sim": 0.5588910467922688, + "flow/improvement_ratio": 0.9700996112078428, + "flow/mag_ratio_mean": 0.535472328774631, + "flow/mag_ratio_std": 0.2249652906320989, + "step": 41984 + }, + { + "epoch": 0.43638329054454367, + "eval_cos_loss": 0.4411089513450861, + "eval_loss": 1.5351563543081284, + "eval_mse_loss": 1.3146018758416176, + "eval_runtime": 2.6645, + "eval_samples_per_second": 750.607, + "eval_steps_per_second": 12.01, + "flow/cos_sim": 0.5588910467922688, + "flow/improvement_ratio": 0.9700996112078428, + "flow/mag_ratio_mean": 0.535472328774631, + "flow/mag_ratio_std": 0.2249652906320989, + "step": 41984 + }, + { + "epoch": 0.447026785435874, + "grad_norm": 0.27407148480415344, + "learning_rate": 0.0006111923466049098, + "loss": 1.5525274276733398, + "step": 43008 + }, + { + "epoch": 0.447026785435874, + "eval_cos_loss": 0.43889701180160046, + "eval_loss": 1.5268253944814205, + "eval_mse_loss": 1.3073768950998783, + "flow/cos_sim": 0.561102925799787, + "flow/improvement_ratio": 0.9647987205535173, + "flow/mag_ratio_mean": 0.5332341426983476, + "flow/mag_ratio_std": 0.22829985432326794, + "step": 43008 + }, + { + "epoch": 0.447026785435874, + "eval_cos_loss": 0.43889701180160046, + "eval_loss": 1.5268253944814205, + "eval_mse_loss": 1.3073768950998783, + "eval_runtime": 2.7939, + "eval_samples_per_second": 715.846, + "eval_steps_per_second": 11.454, + "flow/cos_sim": 0.561102925799787, + "flow/improvement_ratio": 0.9647987205535173, + "flow/mag_ratio_mean": 0.5332341426983476, + "flow/mag_ratio_std": 0.22829985432326794, + "step": 43008 + }, + { + "epoch": 0.4576702803272043, + "grad_norm": 0.1399686485528946, + "learning_rate": 0.0005942916270463306, + "loss": 1.5545454025268555, + "step": 44032 + }, + { + "epoch": 0.4576702803272043, + "eval_cos_loss": 0.44924431946128607, + "eval_loss": 1.553151711821556, + "eval_mse_loss": 1.3285295516252518, + "flow/cos_sim": 0.550755743868649, + "flow/improvement_ratio": 0.9646315854042768, + "flow/mag_ratio_mean": 0.5285015730187297, + "flow/mag_ratio_std": 0.22717531491070986, + "step": 44032 + }, + { + "epoch": 0.4576702803272043, + "eval_cos_loss": 0.44924431946128607, + "eval_loss": 1.553151711821556, + "eval_mse_loss": 1.3285295516252518, + "eval_runtime": 2.6015, + "eval_samples_per_second": 768.789, + "eval_steps_per_second": 12.301, + "flow/cos_sim": 0.550755743868649, + "flow/improvement_ratio": 0.9646315854042768, + "flow/mag_ratio_mean": 0.5285015730187297, + "flow/mag_ratio_std": 0.22717531491070986, + "step": 44032 + }, + { + "epoch": 0.4683137752185346, + "grad_norm": 0.17331954836845398, + "learning_rate": 0.0005772784249462365, + "loss": 1.5531715154647827, + "step": 45056 + }, + { + "epoch": 0.4683137752185346, + "eval_cos_loss": 0.44831305276602507, + "eval_loss": 1.55314514413476, + "eval_mse_loss": 1.328988615423441, + "flow/cos_sim": 0.5516869705170393, + "flow/improvement_ratio": 0.9640028644353151, + "flow/mag_ratio_mean": 0.53109060972929, + "flow/mag_ratio_std": 0.22524388320744038, + "step": 45056 + }, + { + "epoch": 0.4683137752185346, + "eval_cos_loss": 0.44831305276602507, + "eval_loss": 1.55314514413476, + "eval_mse_loss": 1.328988615423441, + "eval_runtime": 2.6383, + "eval_samples_per_second": 758.058, + "eval_steps_per_second": 12.129, + "flow/cos_sim": 0.5516869705170393, + "flow/improvement_ratio": 0.9640028644353151, + "flow/mag_ratio_mean": 0.53109060972929, + "flow/mag_ratio_std": 0.22524388320744038, + "step": 45056 + }, + { + "epoch": 0.478957270109865, + "grad_norm": 0.20901691913604736, + "learning_rate": 0.0005601730357250316, + "loss": 1.5534908771514893, + "step": 46080 + }, + { + "epoch": 0.478957270109865, + "eval_cos_loss": 0.4380967328324914, + "eval_loss": 1.5263510905206203, + "eval_mse_loss": 1.3073027282953262, + "flow/cos_sim": 0.5619033649563789, + "flow/improvement_ratio": 0.9646317362785339, + "flow/mag_ratio_mean": 0.5367344031110406, + "flow/mag_ratio_std": 0.22356789046898484, + "step": 46080 + }, + { + "epoch": 0.478957270109865, + "eval_cos_loss": 0.4380967328324914, + "eval_loss": 1.5263510905206203, + "eval_mse_loss": 1.3073027282953262, + "eval_runtime": 3.1077, + "eval_samples_per_second": 643.555, + "eval_steps_per_second": 10.297, + "flow/cos_sim": 0.5619033649563789, + "flow/improvement_ratio": 0.9646317362785339, + "flow/mag_ratio_mean": 0.5367344031110406, + "flow/mag_ratio_std": 0.22356789046898484, + "step": 46080 + }, + { + "epoch": 0.4896007650011953, + "grad_norm": 0.18614411354064941, + "learning_rate": 0.0005430126677168879, + "loss": 1.5517550706863403, + "step": 47104 + }, + { + "epoch": 0.4896007650011953, + "eval_cos_loss": 0.4381159236654639, + "eval_loss": 1.5279735252261162, + "eval_mse_loss": 1.308915562927723, + "flow/cos_sim": 0.5618840865790844, + "flow/improvement_ratio": 0.9676383044570684, + "flow/mag_ratio_mean": 0.5347359916195273, + "flow/mag_ratio_std": 0.22025129199028015, + "step": 47104 + }, + { + "epoch": 0.4896007650011953, + "eval_cos_loss": 0.4381159236654639, + "eval_loss": 1.5279735252261162, + "eval_mse_loss": 1.308915562927723, + "eval_runtime": 2.5963, + "eval_samples_per_second": 770.338, + "eval_steps_per_second": 12.325, + "flow/cos_sim": 0.5618840865790844, + "flow/improvement_ratio": 0.9676383044570684, + "flow/mag_ratio_mean": 0.5347359916195273, + "flow/mag_ratio_std": 0.22025129199028015, + "step": 47104 + }, + { + "epoch": 0.5002442598925256, + "grad_norm": 0.20864352583885193, + "learning_rate": 0.0005257842461318475, + "loss": 1.5500738620758057, + "step": 48128 + }, + { + "epoch": 0.5002442598925256, + "eval_cos_loss": 0.4427802488207817, + "eval_loss": 1.5378683991730213, + "eval_mse_loss": 1.3164782784879208, + "flow/cos_sim": 0.5572197437286377, + "flow/improvement_ratio": 0.9660468604415655, + "flow/mag_ratio_mean": 0.5358876623213291, + "flow/mag_ratio_std": 0.22517190361395478, + "step": 48128 + }, + { + "epoch": 0.5002442598925256, + "eval_cos_loss": 0.4427802488207817, + "eval_loss": 1.5378683991730213, + "eval_mse_loss": 1.3164782784879208, + "eval_runtime": 2.7741, + "eval_samples_per_second": 720.965, + "eval_steps_per_second": 11.535, + "flow/cos_sim": 0.5572197437286377, + "flow/improvement_ratio": 0.9660468604415655, + "flow/mag_ratio_mean": 0.5358876623213291, + "flow/mag_ratio_std": 0.22517190361395478, + "step": 48128 + }, + { + "epoch": 0.510887754783856, + "grad_norm": 0.18199937045574188, + "learning_rate": 0.0005085250659563913, + "loss": 1.5504491329193115, + "step": 49152 + }, + { + "epoch": 0.510887754783856, + "eval_cos_loss": 0.4387433025985956, + "eval_loss": 1.5289665646851063, + "eval_mse_loss": 1.309594914317131, + "flow/cos_sim": 0.561256805434823, + "flow/improvement_ratio": 0.9640381913632154, + "flow/mag_ratio_mean": 0.5361321400851011, + "flow/mag_ratio_std": 0.22544911736622453, + "step": 49152 + }, + { + "epoch": 0.510887754783856, + "eval_cos_loss": 0.4387433025985956, + "eval_loss": 1.5289665646851063, + "eval_mse_loss": 1.309594914317131, + "eval_runtime": 2.5843, + "eval_samples_per_second": 773.896, + "eval_steps_per_second": 12.382, + "flow/cos_sim": 0.561256805434823, + "flow/improvement_ratio": 0.9640381913632154, + "flow/mag_ratio_mean": 0.5361321400851011, + "flow/mag_ratio_std": 0.22544911736622453, + "step": 49152 + }, + { + "epoch": 0.5215312496751863, + "grad_norm": 0.21871572732925415, + "learning_rate": 0.0004912557160435426, + "loss": 1.551537036895752, + "step": 50176 + }, + { + "epoch": 0.5215312496751863, + "eval_cos_loss": 0.4424938661977649, + "eval_loss": 1.5380571633577347, + "eval_mse_loss": 1.316810242831707, + "flow/cos_sim": 0.5575060974806547, + "flow/improvement_ratio": 0.9636888317763805, + "flow/mag_ratio_mean": 0.5369405504316092, + "flow/mag_ratio_std": 0.22526462702080607, + "step": 50176 + }, + { + "epoch": 0.5215312496751863, + "eval_cos_loss": 0.4424938661977649, + "eval_loss": 1.5380571633577347, + "eval_mse_loss": 1.316810242831707, + "eval_runtime": 2.6421, + "eval_samples_per_second": 756.983, + "eval_steps_per_second": 12.112, + "flow/cos_sim": 0.5575060974806547, + "flow/improvement_ratio": 0.9636888317763805, + "flow/mag_ratio_mean": 0.5369405504316092, + "flow/mag_ratio_std": 0.22526462702080607, + "step": 50176 + }, + { + "epoch": 0.5321747445665167, + "grad_norm": 0.19360916316509247, + "learning_rate": 0.000474013640007982, + "loss": 1.550221562385559, + "step": 51200 + }, + { + "epoch": 0.5321747445665167, + "eval_cos_loss": 0.44469246733933687, + "eval_loss": 1.5433855392038822, + "eval_mse_loss": 1.3210392966866493, + "flow/cos_sim": 0.5553075838834047, + "flow/improvement_ratio": 0.9643128626048565, + "flow/mag_ratio_mean": 0.5328638143837452, + "flow/mag_ratio_std": 0.2267028819769621, + "step": 51200 + }, + { + "epoch": 0.5321747445665167, + "eval_cos_loss": 0.44469246733933687, + "eval_loss": 1.5433855392038822, + "eval_mse_loss": 1.3210392966866493, + "eval_runtime": 2.5901, + "eval_samples_per_second": 772.182, + "eval_steps_per_second": 12.355, + "flow/cos_sim": 0.5553075838834047, + "flow/improvement_ratio": 0.9643128626048565, + "flow/mag_ratio_mean": 0.5328638143837452, + "flow/mag_ratio_std": 0.2267028819769621, + "step": 51200 + }, + { + "epoch": 0.542818239457847, + "grad_norm": 0.25140267610549927, + "learning_rate": 0.0004567857008049507, + "loss": 1.5501980781555176, + "step": 52224 + }, + { + "epoch": 0.542818239457847, + "eval_cos_loss": 0.44003486074507236, + "eval_loss": 1.5340061485767365, + "eval_mse_loss": 1.3139887191355228, + "flow/cos_sim": 0.5599651224911213, + "flow/improvement_ratio": 0.9662698730826378, + "flow/mag_ratio_mean": 0.5311327101662755, + "flow/mag_ratio_std": 0.21956392657011747, + "step": 52224 + }, + { + "epoch": 0.542818239457847, + "eval_cos_loss": 0.44003486074507236, + "eval_loss": 1.5340061485767365, + "eval_mse_loss": 1.3139887191355228, + "eval_runtime": 2.5514, + "eval_samples_per_second": 783.875, + "eval_steps_per_second": 12.542, + "flow/cos_sim": 0.5599651224911213, + "flow/improvement_ratio": 0.9662698730826378, + "flow/mag_ratio_mean": 0.5311327101662755, + "flow/mag_ratio_std": 0.21956392657011747, + "step": 52224 + }, + { + "epoch": 0.5534617343491773, + "grad_norm": 0.1874593198299408, + "learning_rate": 0.0004396260548863663, + "loss": 1.5494704246520996, + "step": 53248 + }, + { + "epoch": 0.5534617343491773, + "eval_cos_loss": 0.4375583464279771, + "eval_loss": 1.5273119732737541, + "eval_mse_loss": 1.3085327930748463, + "flow/cos_sim": 0.5624416321516037, + "flow/improvement_ratio": 0.9654307011514902, + "flow/mag_ratio_mean": 0.5390398278832436, + "flow/mag_ratio_std": 0.2204155451618135, + "step": 53248 + }, + { + "epoch": 0.5534617343491773, + "eval_cos_loss": 0.4375583464279771, + "eval_loss": 1.5273119732737541, + "eval_mse_loss": 1.3085327930748463, + "eval_runtime": 2.9751, + "eval_samples_per_second": 672.236, + "eval_steps_per_second": 10.756, + "flow/cos_sim": 0.5624416321516037, + "flow/improvement_ratio": 0.9654307011514902, + "flow/mag_ratio_mean": 0.5390398278832436, + "flow/mag_ratio_std": 0.2204155451618135, + "step": 53248 + }, + { + "epoch": 0.5641052292405077, + "grad_norm": 0.260960191488266, + "learning_rate": 0.000422521628012444, + "loss": 1.5488193035125732, + "step": 54272 + }, + { + "epoch": 0.5641052292405077, + "eval_cos_loss": 0.4423528155311942, + "eval_loss": 1.5401365533471107, + "eval_mse_loss": 1.318960152566433, + "flow/cos_sim": 0.5576471537351608, + "flow/improvement_ratio": 0.9645018931478262, + "flow/mag_ratio_mean": 0.5321623589843512, + "flow/mag_ratio_std": 0.22070467984303832, + "step": 54272 + }, + { + "epoch": 0.5641052292405077, + "eval_cos_loss": 0.4423528155311942, + "eval_loss": 1.5401365533471107, + "eval_mse_loss": 1.318960152566433, + "eval_runtime": 2.7006, + "eval_samples_per_second": 740.582, + "eval_steps_per_second": 11.849, + "flow/cos_sim": 0.5576471537351608, + "flow/improvement_ratio": 0.9645018931478262, + "flow/mag_ratio_mean": 0.5321623589843512, + "flow/mag_ratio_std": 0.22070467984303832, + "step": 54272 + }, + { + "epoch": 0.574748724131838, + "grad_norm": 0.18275974690914154, + "learning_rate": 0.00040552618837104806, + "loss": 1.5481247901916504, + "step": 55296 + }, + { + "epoch": 0.574748724131838, + "eval_cos_loss": 0.4375903755426407, + "eval_loss": 1.52578229829669, + "eval_mse_loss": 1.3069871068000793, + "flow/cos_sim": 0.5624096170067787, + "flow/improvement_ratio": 0.9644604399800301, + "flow/mag_ratio_mean": 0.5396961104124784, + "flow/mag_ratio_std": 0.22439152654260397, + "step": 55296 + }, + { + "epoch": 0.574748724131838, + "eval_cos_loss": 0.4375903755426407, + "eval_loss": 1.52578229829669, + "eval_mse_loss": 1.3069871068000793, + "eval_runtime": 2.5932, + "eval_samples_per_second": 771.259, + "eval_steps_per_second": 12.34, + "flow/cos_sim": 0.5624096170067787, + "flow/improvement_ratio": 0.9644604399800301, + "flow/mag_ratio_mean": 0.5396961104124784, + "flow/mag_ratio_std": 0.22439152654260397, + "step": 55296 + }, + { + "epoch": 0.5853922190231683, + "grad_norm": 0.19556212425231934, + "learning_rate": 0.00038862678683408014, + "loss": 1.548845648765564, + "step": 56320 + }, + { + "epoch": 0.5853922190231683, + "eval_cos_loss": 0.44385355431586504, + "eval_loss": 1.5408159419894218, + "eval_mse_loss": 1.3188891597092152, + "flow/cos_sim": 0.5561465304344893, + "flow/improvement_ratio": 0.9655895195901394, + "flow/mag_ratio_mean": 0.5379520216956735, + "flow/mag_ratio_std": 0.22702415706589818, + "step": 56320 + }, + { + "epoch": 0.5853922190231683, + "eval_cos_loss": 0.44385355431586504, + "eval_loss": 1.5408159419894218, + "eval_mse_loss": 1.3188891597092152, + "eval_runtime": 2.811, + "eval_samples_per_second": 711.502, + "eval_steps_per_second": 11.384, + "flow/cos_sim": 0.5561465304344893, + "flow/improvement_ratio": 0.9655895195901394, + "flow/mag_ratio_mean": 0.5379520216956735, + "flow/mag_ratio_std": 0.22702415706589818, + "step": 56320 + }, + { + "epoch": 0.5960357139144986, + "grad_norm": 0.154473677277565, + "learning_rate": 0.00037187654708719937, + "loss": 1.5476142168045044, + "step": 57344 + }, + { + "epoch": 0.5960357139144986, + "eval_cos_loss": 0.44051590468734503, + "eval_loss": 1.5316696986556053, + "eval_mse_loss": 1.3114117458462715, + "flow/cos_sim": 0.5594841903075576, + "flow/improvement_ratio": 0.9659801628440619, + "flow/mag_ratio_mean": 0.5374981416389346, + "flow/mag_ratio_std": 0.2266282932832837, + "step": 57344 + }, + { + "epoch": 0.5960357139144986, + "eval_cos_loss": 0.44051590468734503, + "eval_loss": 1.5316696986556053, + "eval_mse_loss": 1.3114117458462715, + "eval_runtime": 2.9501, + "eval_samples_per_second": 677.94, + "eval_steps_per_second": 10.847, + "flow/cos_sim": 0.5594841903075576, + "flow/improvement_ratio": 0.9659801628440619, + "flow/mag_ratio_mean": 0.5374981416389346, + "flow/mag_ratio_std": 0.2266282932832837, + "step": 57344 + }, + { + "epoch": 0.6066792088058289, + "grad_norm": 0.19920258224010468, + "learning_rate": 0.00035526270682447326, + "loss": 1.546189785003662, + "step": 58368 + }, + { + "epoch": 0.6066792088058289, + "eval_cos_loss": 0.45052160415798426, + "eval_loss": 1.5585628859698772, + "eval_mse_loss": 1.333302080631256, + "flow/cos_sim": 0.5494783949106932, + "flow/improvement_ratio": 0.9626638870686293, + "flow/mag_ratio_mean": 0.5275079058483243, + "flow/mag_ratio_std": 0.22685475973412395, + "step": 58368 + }, + { + "epoch": 0.6066792088058289, + "eval_cos_loss": 0.45052160415798426, + "eval_loss": 1.5585628859698772, + "eval_mse_loss": 1.333302080631256, + "eval_runtime": 2.5748, + "eval_samples_per_second": 776.758, + "eval_steps_per_second": 12.428, + "flow/cos_sim": 0.5494783949106932, + "flow/improvement_ratio": 0.9626638870686293, + "flow/mag_ratio_mean": 0.5275079058483243, + "flow/mag_ratio_std": 0.22685475973412395, + "step": 58368 + }, + { + "epoch": 0.6173227036971594, + "grad_norm": 0.15236690640449524, + "learning_rate": 0.0003388374920626505, + "loss": 1.546614408493042, + "step": 59392 + }, + { + "epoch": 0.6173227036971594, + "eval_cos_loss": 0.44333774503320456, + "eval_loss": 1.5386833399534225, + "eval_mse_loss": 1.3170144706964493, + "flow/cos_sim": 0.5566621888428926, + "flow/improvement_ratio": 0.9644087161868811, + "flow/mag_ratio_mean": 0.536553805693984, + "flow/mag_ratio_std": 0.22319983318448067, + "step": 59392 + }, + { + "epoch": 0.6173227036971594, + "eval_cos_loss": 0.44333774503320456, + "eval_loss": 1.5386833399534225, + "eval_mse_loss": 1.3170144706964493, + "eval_runtime": 2.5638, + "eval_samples_per_second": 780.079, + "eval_steps_per_second": 12.481, + "flow/cos_sim": 0.5566621888428926, + "flow/improvement_ratio": 0.9644087161868811, + "flow/mag_ratio_mean": 0.536553805693984, + "flow/mag_ratio_std": 0.22319983318448067, + "step": 59392 + }, + { + "epoch": 0.6279661985884897, + "grad_norm": 0.20959317684173584, + "learning_rate": 0.0003225883881381223, + "loss": 1.5455403327941895, + "step": 60416 + }, + { + "epoch": 0.6279661985884897, + "eval_cos_loss": 0.4418268231675029, + "eval_loss": 1.5369167998433113, + "eval_mse_loss": 1.3160033896565437, + "flow/cos_sim": 0.5581732373684645, + "flow/improvement_ratio": 0.9641634412109852, + "flow/mag_ratio_mean": 0.5371082350611687, + "flow/mag_ratio_std": 0.22345659835264087, + "step": 60416 + }, + { + "epoch": 0.6279661985884897, + "eval_cos_loss": 0.4418268231675029, + "eval_loss": 1.5369167998433113, + "eval_mse_loss": 1.3160033896565437, + "eval_runtime": 3.0215, + "eval_samples_per_second": 661.926, + "eval_steps_per_second": 10.591, + "flow/cos_sim": 0.5581732373684645, + "flow/improvement_ratio": 0.9641634412109852, + "flow/mag_ratio_mean": 0.5371082350611687, + "flow/mag_ratio_std": 0.22345659835264087, + "step": 60416 + }, + { + "epoch": 0.63860969347982, + "grad_norm": 0.14656518399715424, + "learning_rate": 0.00030656647450995957, + "loss": 1.5472609996795654, + "step": 61440 + }, + { + "epoch": 0.63860969347982, + "eval_cos_loss": 0.4388773338869214, + "eval_loss": 1.5277933366596699, + "eval_mse_loss": 1.308354664593935, + "flow/cos_sim": 0.5611227098852396, + "flow/improvement_ratio": 0.9615725018084049, + "flow/mag_ratio_mean": 0.5404465068131685, + "flow/mag_ratio_std": 0.22536235908046365, + "step": 61440 + }, + { + "epoch": 0.63860969347982, + "eval_cos_loss": 0.4388773338869214, + "eval_loss": 1.5277933366596699, + "eval_mse_loss": 1.308354664593935, + "eval_runtime": 2.725, + "eval_samples_per_second": 733.933, + "eval_steps_per_second": 11.743, + "flow/cos_sim": 0.5611227098852396, + "flow/improvement_ratio": 0.9615725018084049, + "flow/mag_ratio_mean": 0.5404465068131685, + "flow/mag_ratio_std": 0.22536235908046365, + "step": 61440 + }, + { + "epoch": 0.6492531883711503, + "grad_norm": 0.16984502971172333, + "learning_rate": 0.0002907595437968689, + "loss": 1.546900987625122, + "step": 62464 + }, + { + "epoch": 0.6492531883711503, + "eval_cos_loss": 0.44708416890352964, + "eval_loss": 1.550130132585764, + "eval_mse_loss": 1.3265880458056927, + "flow/cos_sim": 0.5529158180579543, + "flow/improvement_ratio": 0.964706290513277, + "flow/mag_ratio_mean": 0.5327735636383295, + "flow/mag_ratio_std": 0.22531110560521483, + "step": 62464 + }, + { + "epoch": 0.6492531883711503, + "eval_cos_loss": 0.44708416890352964, + "eval_loss": 1.550130132585764, + "eval_mse_loss": 1.3265880458056927, + "eval_runtime": 2.9226, + "eval_samples_per_second": 684.317, + "eval_steps_per_second": 10.949, + "flow/cos_sim": 0.5529158180579543, + "flow/improvement_ratio": 0.964706290513277, + "flow/mag_ratio_mean": 0.5327735636383295, + "flow/mag_ratio_std": 0.22531110560521483, + "step": 62464 + }, + { + "epoch": 0.6598966832624806, + "grad_norm": 0.16193066537380219, + "learning_rate": 0.00027521728547552225, + "loss": 1.5464342832565308, + "step": 63488 + }, + { + "epoch": 0.6598966832624806, + "eval_cos_loss": 0.44685271102935076, + "eval_loss": 1.5440139174461365, + "eval_mse_loss": 1.3205875717103481, + "flow/cos_sim": 0.5531473197042942, + "flow/improvement_ratio": 0.9620692692697048, + "flow/mag_ratio_mean": 0.5350175518542528, + "flow/mag_ratio_std": 0.23371326178312302, + "step": 63488 + }, + { + "epoch": 0.6598966832624806, + "eval_cos_loss": 0.44685271102935076, + "eval_loss": 1.5440139174461365, + "eval_mse_loss": 1.3205875717103481, + "eval_runtime": 2.588, + "eval_samples_per_second": 772.809, + "eval_steps_per_second": 12.365, + "flow/cos_sim": 0.5531473197042942, + "flow/improvement_ratio": 0.9620692692697048, + "flow/mag_ratio_mean": 0.5350175518542528, + "flow/mag_ratio_std": 0.23371326178312302, + "step": 63488 + }, + { + "epoch": 0.670540178153811, + "grad_norm": 0.15779773890972137, + "learning_rate": 0.00025992785762254565, + "loss": 1.545114278793335, + "step": 64512 + }, + { + "epoch": 0.670540178153811, + "eval_cos_loss": 0.4412845829501748, + "eval_loss": 1.5324720852077007, + "eval_mse_loss": 1.311829797923565, + "flow/cos_sim": 0.5587154757231474, + "flow/improvement_ratio": 0.9654579609632492, + "flow/mag_ratio_mean": 0.5386016722768545, + "flow/mag_ratio_std": 0.22591237584128976, + "step": 64512 + }, + { + "epoch": 0.670540178153811, + "eval_cos_loss": 0.4412845829501748, + "eval_loss": 1.5324720852077007, + "eval_mse_loss": 1.311829797923565, + "eval_runtime": 2.5359, + "eval_samples_per_second": 788.675, + "eval_steps_per_second": 12.619, + "flow/cos_sim": 0.5587154757231474, + "flow/improvement_ratio": 0.9654579609632492, + "flow/mag_ratio_mean": 0.5386016722768545, + "flow/mag_ratio_std": 0.22591237584128976, + "step": 64512 + }, + { + "epoch": 0.6811836730451413, + "grad_norm": 0.13823935389518738, + "learning_rate": 0.00024493932293160253, + "loss": 1.5480190515518188, + "step": 65536 + }, + { + "epoch": 0.6811836730451413, + "eval_cos_loss": 0.441369004547596, + "eval_loss": 1.5341729335486889, + "eval_mse_loss": 1.3134884350001812, + "flow/cos_sim": 0.5586309880018234, + "flow/improvement_ratio": 0.9630204811692238, + "flow/mag_ratio_mean": 0.5338549390435219, + "flow/mag_ratio_std": 0.2280183294788003, + "step": 65536 + }, + { + "epoch": 0.6811836730451413, + "eval_cos_loss": 0.441369004547596, + "eval_loss": 1.5341729335486889, + "eval_mse_loss": 1.3134884350001812, + "eval_runtime": 2.5158, + "eval_samples_per_second": 794.978, + "eval_steps_per_second": 12.72, + "flow/cos_sim": 0.5586309880018234, + "flow/improvement_ratio": 0.9630204811692238, + "flow/mag_ratio_mean": 0.5338549390435219, + "flow/mag_ratio_std": 0.2280183294788003, + "step": 65536 + }, + { + "epoch": 0.6918271679364716, + "grad_norm": 0.15880072116851807, + "learning_rate": 0.00023024026137121335, + "loss": 1.5464882850646973, + "step": 66560 + }, + { + "epoch": 0.6918271679364716, + "eval_cos_loss": 0.4457838609814644, + "eval_loss": 1.543992355465889, + "eval_mse_loss": 1.3211004212498665, + "flow/cos_sim": 0.554216225631535, + "flow/improvement_ratio": 0.9604913741350174, + "flow/mag_ratio_mean": 0.5307466676458716, + "flow/mag_ratio_std": 0.22845542011782527, + "step": 66560 + }, + { + "epoch": 0.6918271679364716, + "eval_cos_loss": 0.4457838609814644, + "eval_loss": 1.543992355465889, + "eval_mse_loss": 1.3211004212498665, + "eval_runtime": 2.821, + "eval_samples_per_second": 708.957, + "eval_steps_per_second": 11.343, + "flow/cos_sim": 0.554216225631535, + "flow/improvement_ratio": 0.9604913741350174, + "flow/mag_ratio_mean": 0.5307466676458716, + "flow/mag_ratio_std": 0.22845542011782527, + "step": 66560 + }, + { + "epoch": 0.702470662827802, + "grad_norm": 0.16780522465705872, + "learning_rate": 0.0002158768798040357, + "loss": 1.5465317964553833, + "step": 67584 + }, + { + "epoch": 0.702470662827802, + "eval_cos_loss": 0.4425074281170964, + "eval_loss": 1.5348311252892017, + "eval_mse_loss": 1.3135774172842503, + "flow/cos_sim": 0.557492695748806, + "flow/improvement_ratio": 0.9596080742776394, + "flow/mag_ratio_mean": 0.5359784308820963, + "flow/mag_ratio_std": 0.22894051391631365, + "step": 67584 + }, + { + "epoch": 0.702470662827802, + "eval_cos_loss": 0.4425074281170964, + "eval_loss": 1.5348311252892017, + "eval_mse_loss": 1.3135774172842503, + "eval_runtime": 3.1093, + "eval_samples_per_second": 643.239, + "eval_steps_per_second": 10.292, + "flow/cos_sim": 0.557492695748806, + "flow/improvement_ratio": 0.9596080742776394, + "flow/mag_ratio_mean": 0.5359784308820963, + "flow/mag_ratio_std": 0.22894051391631365, + "step": 67584 + }, + { + "epoch": 0.7131141577191323, + "grad_norm": 0.14971283078193665, + "learning_rate": 0.0002018382345138871, + "loss": 1.5448497533798218, + "step": 68608 + }, + { + "epoch": 0.7131141577191323, + "eval_cos_loss": 0.4460556413978338, + "eval_loss": 1.544582311064005, + "eval_mse_loss": 1.3215544857084751, + "flow/cos_sim": 0.5539443735033274, + "flow/improvement_ratio": 0.9611221943050623, + "flow/mag_ratio_mean": 0.5354085844010115, + "flow/mag_ratio_std": 0.23012477485463023, + "step": 68608 + }, + { + "epoch": 0.7131141577191323, + "eval_cos_loss": 0.4460556413978338, + "eval_loss": 1.544582311064005, + "eval_mse_loss": 1.3215544857084751, + "eval_runtime": 2.5817, + "eval_samples_per_second": 774.689, + "eval_steps_per_second": 12.395, + "flow/cos_sim": 0.5539443735033274, + "flow/improvement_ratio": 0.9611221943050623, + "flow/mag_ratio_mean": 0.5354085844010115, + "flow/mag_ratio_std": 0.23012477485463023, + "step": 68608 + }, + { + "epoch": 0.7237576526104627, + "grad_norm": 0.14716410636901855, + "learning_rate": 0.00018816845632858647, + "loss": 1.5454319715499878, + "step": 69632 + }, + { + "epoch": 0.7237576526104627, + "eval_cos_loss": 0.4387012077495456, + "eval_loss": 1.527622751891613, + "eval_mse_loss": 1.3082721531391144, + "flow/cos_sim": 0.5612988974899054, + "flow/improvement_ratio": 0.9638887159526348, + "flow/mag_ratio_mean": 0.5365499863401055, + "flow/mag_ratio_std": 0.2257627071812749, + "step": 69632 + }, + { + "epoch": 0.7237576526104627, + "eval_cos_loss": 0.4387012077495456, + "eval_loss": 1.527622751891613, + "eval_mse_loss": 1.3082721531391144, + "eval_runtime": 2.5421, + "eval_samples_per_second": 786.736, + "eval_steps_per_second": 12.588, + "flow/cos_sim": 0.5612988974899054, + "flow/improvement_ratio": 0.9638887159526348, + "flow/mag_ratio_mean": 0.5365499863401055, + "flow/mag_ratio_std": 0.2257627071812749, + "step": 69632 + }, + { + "epoch": 0.734401147501793, + "grad_norm": 0.1667480319738388, + "learning_rate": 0.00017485713000071836, + "loss": 1.5442575216293335, + "step": 70656 + }, + { + "epoch": 0.734401147501793, + "eval_cos_loss": 0.44433039985597134, + "eval_loss": 1.540200136601925, + "eval_mse_loss": 1.3180349357426167, + "flow/cos_sim": 0.5556696448475122, + "flow/improvement_ratio": 0.9625885803252459, + "flow/mag_ratio_mean": 0.5337581913918257, + "flow/mag_ratio_std": 0.22786249266937375, + "step": 70656 + }, + { + "epoch": 0.734401147501793, + "eval_cos_loss": 0.44433039985597134, + "eval_loss": 1.540200136601925, + "eval_mse_loss": 1.3180349357426167, + "eval_runtime": 2.5514, + "eval_samples_per_second": 783.877, + "eval_steps_per_second": 12.542, + "flow/cos_sim": 0.5556696448475122, + "flow/improvement_ratio": 0.9625885803252459, + "flow/mag_ratio_mean": 0.5337581913918257, + "flow/mag_ratio_std": 0.22786249266937375, + "step": 70656 + }, + { + "epoch": 0.7450446423931233, + "grad_norm": 0.21780510246753693, + "learning_rate": 0.00016194610001300043, + "loss": 1.5446763038635254, + "step": 71680 + }, + { + "epoch": 0.7450446423931233, + "eval_cos_loss": 0.44072897639125586, + "eval_loss": 1.53380636125803, + "eval_mse_loss": 1.3134418688714504, + "flow/cos_sim": 0.559271028265357, + "flow/improvement_ratio": 0.9642956424504519, + "flow/mag_ratio_mean": 0.5349069200456142, + "flow/mag_ratio_std": 0.22263477742671967, + "step": 71680 + }, + { + "epoch": 0.7450446423931233, + "eval_cos_loss": 0.44072897639125586, + "eval_loss": 1.53380636125803, + "eval_mse_loss": 1.3134418688714504, + "eval_runtime": 2.544, + "eval_samples_per_second": 786.162, + "eval_steps_per_second": 12.579, + "flow/cos_sim": 0.559271028265357, + "flow/improvement_ratio": 0.9642956424504519, + "flow/mag_ratio_mean": 0.5349069200456142, + "flow/mag_ratio_std": 0.22263477742671967, + "step": 71680 + }, + { + "epoch": 0.7556881372844536, + "grad_norm": 0.21069595217704773, + "learning_rate": 0.0001494255292217801, + "loss": 1.544022560119629, + "step": 72704 + }, + { + "epoch": 0.7556881372844536, + "eval_cos_loss": 0.44525754544883966, + "eval_loss": 1.5455412901937962, + "eval_mse_loss": 1.322912521660328, + "flow/cos_sim": 0.5547424238175154, + "flow/improvement_ratio": 0.9653947055339813, + "flow/mag_ratio_mean": 0.5336724892258644, + "flow/mag_ratio_std": 0.22635432425886393, + "step": 72704 + }, + { + "epoch": 0.7556881372844536, + "eval_cos_loss": 0.44525754544883966, + "eval_loss": 1.5455412901937962, + "eval_mse_loss": 1.322912521660328, + "eval_runtime": 2.8381, + "eval_samples_per_second": 704.708, + "eval_steps_per_second": 11.275, + "flow/cos_sim": 0.5547424238175154, + "flow/improvement_ratio": 0.9653947055339813, + "flow/mag_ratio_mean": 0.5336724892258644, + "flow/mag_ratio_std": 0.22635432425886393, + "step": 72704 + }, + { + "epoch": 0.7663316321757839, + "grad_norm": 0.13283193111419678, + "learning_rate": 0.0001373347763502365, + "loss": 1.545114517211914, + "step": 73728 + }, + { + "epoch": 0.7663316321757839, + "eval_cos_loss": 0.44415116030722857, + "eval_loss": 1.5426055118441582, + "eval_mse_loss": 1.320529941469431, + "flow/cos_sim": 0.5558489598333836, + "flow/improvement_ratio": 0.960325175896287, + "flow/mag_ratio_mean": 0.5346721205860376, + "flow/mag_ratio_std": 0.2243386204354465, + "step": 73728 + }, + { + "epoch": 0.7663316321757839, + "eval_cos_loss": 0.44415116030722857, + "eval_loss": 1.5426055118441582, + "eval_mse_loss": 1.320529941469431, + "eval_runtime": 2.5354, + "eval_samples_per_second": 788.834, + "eval_steps_per_second": 12.621, + "flow/cos_sim": 0.5558489598333836, + "flow/improvement_ratio": 0.960325175896287, + "flow/mag_ratio_mean": 0.5346721205860376, + "flow/mag_ratio_std": 0.2243386204354465, + "step": 73728 + }, + { + "epoch": 0.7769751270671144, + "grad_norm": 0.13968615233898163, + "learning_rate": 0.00012566462923845807, + "loss": 1.5433732271194458, + "step": 74752 + }, + { + "epoch": 0.7769751270671144, + "eval_cos_loss": 0.44200514908879995, + "eval_loss": 1.5366779565811157, + "eval_mse_loss": 1.3156753852963448, + "flow/cos_sim": 0.5579948425292969, + "flow/improvement_ratio": 0.9636132828891277, + "flow/mag_ratio_mean": 0.5333766378462315, + "flow/mag_ratio_std": 0.22553266119211912, + "step": 74752 + }, + { + "epoch": 0.7769751270671144, + "eval_cos_loss": 0.44200514908879995, + "eval_loss": 1.5366779565811157, + "eval_mse_loss": 1.3156753852963448, + "eval_runtime": 2.4816, + "eval_samples_per_second": 805.947, + "eval_steps_per_second": 12.895, + "flow/cos_sim": 0.5579948425292969, + "flow/improvement_ratio": 0.9636132828891277, + "flow/mag_ratio_mean": 0.5333766378462315, + "flow/mag_ratio_std": 0.22553266119211912, + "step": 74752 + }, + { + "epoch": 0.7876186219584447, + "grad_norm": 0.22951993346214294, + "learning_rate": 0.00011445177328180711, + "loss": 1.5437688827514648, + "step": 75776 + }, + { + "epoch": 0.7876186219584447, + "eval_cos_loss": 0.44082553684711456, + "eval_loss": 1.5322747267782688, + "eval_mse_loss": 1.3118619658052921, + "flow/cos_sim": 0.5591745115816593, + "flow/improvement_ratio": 0.9597889501601458, + "flow/mag_ratio_mean": 0.5374172441661358, + "flow/mag_ratio_std": 0.22841465286910534, + "step": 75776 + }, + { + "epoch": 0.7876186219584447, + "eval_cos_loss": 0.44082553684711456, + "eval_loss": 1.5322747267782688, + "eval_mse_loss": 1.3118619658052921, + "eval_runtime": 2.5567, + "eval_samples_per_second": 782.259, + "eval_steps_per_second": 12.516, + "flow/cos_sim": 0.5591745115816593, + "flow/improvement_ratio": 0.9597889501601458, + "flow/mag_ratio_mean": 0.5374172441661358, + "flow/mag_ratio_std": 0.22841465286910534, + "step": 75776 + }, + { + "epoch": 0.798262116849775, + "grad_norm": 0.15128082036972046, + "learning_rate": 0.00010369794845541591, + "loss": 1.545216679573059, + "step": 76800 + }, + { + "epoch": 0.798262116849775, + "eval_cos_loss": 0.4396012471988797, + "eval_loss": 1.5277335830032825, + "eval_mse_loss": 1.3079329580068588, + "flow/cos_sim": 0.5603987323120236, + "flow/improvement_ratio": 0.9618115201592445, + "flow/mag_ratio_mean": 0.5371274519711733, + "flow/mag_ratio_std": 0.22797544300556183, + "step": 76800 + }, + { + "epoch": 0.798262116849775, + "eval_cos_loss": 0.4396012471988797, + "eval_loss": 1.5277335830032825, + "eval_mse_loss": 1.3079329580068588, + "eval_runtime": 2.4926, + "eval_samples_per_second": 802.376, + "eval_steps_per_second": 12.838, + "flow/cos_sim": 0.5603987323120236, + "flow/improvement_ratio": 0.9618115201592445, + "flow/mag_ratio_mean": 0.5371274519711733, + "flow/mag_ratio_std": 0.22797544300556183, + "step": 76800 + }, + { + "epoch": 0.8089056117411053, + "grad_norm": 0.15258397161960602, + "learning_rate": 9.340614224932947e-05, + "loss": 1.5437705516815186, + "step": 77824 + }, + { + "epoch": 0.8089056117411053, + "eval_cos_loss": 0.4406488761305809, + "eval_loss": 1.5305031947791576, + "eval_mse_loss": 1.3101787567138672, + "flow/cos_sim": 0.5593511275947094, + "flow/improvement_ratio": 0.9616372548043728, + "flow/mag_ratio_mean": 0.5346422707661986, + "flow/mag_ratio_std": 0.22846621181815863, + "step": 77824 + }, + { + "epoch": 0.8089056117411053, + "eval_cos_loss": 0.4406488761305809, + "eval_loss": 1.5305031947791576, + "eval_mse_loss": 1.3101787567138672, + "eval_runtime": 2.5281, + "eval_samples_per_second": 791.111, + "eval_steps_per_second": 12.658, + "flow/cos_sim": 0.5593511275947094, + "flow/improvement_ratio": 0.9616372548043728, + "flow/mag_ratio_mean": 0.5346422707661986, + "flow/mag_ratio_std": 0.22846621181815863, + "step": 77824 + }, + { + "epoch": 0.8195491066324356, + "grad_norm": 0.15012474358081818, + "learning_rate": 8.359937073738122e-05, + "loss": 1.5427945852279663, + "step": 78848 + }, + { + "epoch": 0.8195491066324356, + "eval_cos_loss": 0.4398349104449153, + "eval_loss": 1.5277978368103504, + "eval_mse_loss": 1.3078803904354572, + "flow/cos_sim": 0.5601652916520834, + "flow/improvement_ratio": 0.9623467661440372, + "flow/mag_ratio_mean": 0.5370206441730261, + "flow/mag_ratio_std": 0.2306741690263152, + "step": 78848 + }, + { + "epoch": 0.8195491066324356, + "eval_cos_loss": 0.4398349104449153, + "eval_loss": 1.5277978368103504, + "eval_mse_loss": 1.3078803904354572, + "eval_runtime": 2.9061, + "eval_samples_per_second": 688.21, + "eval_steps_per_second": 11.011, + "flow/cos_sim": 0.5601652916520834, + "flow/improvement_ratio": 0.9623467661440372, + "flow/mag_ratio_mean": 0.5370206441730261, + "flow/mag_ratio_std": 0.2306741690263152, + "step": 78848 + }, + { + "epoch": 0.830192601523766, + "grad_norm": 0.14434155821800232, + "learning_rate": 7.429817849801124e-05, + "loss": 1.5424107313156128, + "step": 79872 + }, + { + "epoch": 0.830192601523766, + "eval_cos_loss": 0.440770055167377, + "eval_loss": 1.5335130989551544, + "eval_mse_loss": 1.31312807649374, + "flow/cos_sim": 0.5592298936098814, + "flow/improvement_ratio": 0.9679525289684534, + "flow/mag_ratio_mean": 0.5366982752457261, + "flow/mag_ratio_std": 0.22420579148456454, + "step": 79872 + }, + { + "epoch": 0.830192601523766, + "eval_cos_loss": 0.440770055167377, + "eval_loss": 1.5335130989551544, + "eval_mse_loss": 1.31312807649374, + "eval_runtime": 2.4978, + "eval_samples_per_second": 800.693, + "eval_steps_per_second": 12.811, + "flow/cos_sim": 0.5592298936098814, + "flow/improvement_ratio": 0.9679525289684534, + "flow/mag_ratio_mean": 0.5366982752457261, + "flow/mag_ratio_std": 0.22420579148456454, + "step": 79872 + }, + { + "epoch": 0.8408360964150963, + "grad_norm": 0.14048728346824646, + "learning_rate": 6.54954787872275e-05, + "loss": 1.5442392826080322, + "step": 80896 + }, + { + "epoch": 0.8408360964150963, + "eval_cos_loss": 0.44281749427318573, + "eval_loss": 1.5380274765193462, + "eval_mse_loss": 1.3166187293827534, + "flow/cos_sim": 0.5571825094521046, + "flow/improvement_ratio": 0.9661159794777632, + "flow/mag_ratio_mean": 0.5346010681241751, + "flow/mag_ratio_std": 0.22787420498207211, + "step": 80896 + }, + { + "epoch": 0.8408360964150963, + "eval_cos_loss": 0.44281749427318573, + "eval_loss": 1.5380274765193462, + "eval_mse_loss": 1.3166187293827534, + "eval_runtime": 2.5057, + "eval_samples_per_second": 798.189, + "eval_steps_per_second": 12.771, + "flow/cos_sim": 0.5571825094521046, + "flow/improvement_ratio": 0.9661159794777632, + "flow/mag_ratio_mean": 0.5346010681241751, + "flow/mag_ratio_std": 0.22787420498207211, + "step": 80896 + }, + { + "epoch": 0.8514795913064266, + "grad_norm": 0.15351833403110504, + "learning_rate": 5.7211109010678276e-05, + "loss": 1.5441709756851196, + "step": 81920 + }, + { + "epoch": 0.8514795913064266, + "eval_cos_loss": 0.43972852267324924, + "eval_loss": 1.5298539474606514, + "eval_mse_loss": 1.3099896907806396, + "flow/cos_sim": 0.5602715257555246, + "flow/improvement_ratio": 0.9654016513377428, + "flow/mag_ratio_mean": 0.5356091465801001, + "flow/mag_ratio_std": 0.22713992185890675, + "step": 81920 + }, + { + "epoch": 0.8514795913064266, + "eval_cos_loss": 0.43972852267324924, + "eval_loss": 1.5298539474606514, + "eval_mse_loss": 1.3099896907806396, + "eval_runtime": 3.0577, + "eval_samples_per_second": 654.077, + "eval_steps_per_second": 10.465, + "flow/cos_sim": 0.5602715257555246, + "flow/improvement_ratio": 0.9654016513377428, + "flow/mag_ratio_mean": 0.5356091465801001, + "flow/mag_ratio_std": 0.22713992185890675, + "step": 81920 + }, + { + "epoch": 0.862123086197757, + "grad_norm": 0.14501118659973145, + "learning_rate": 4.946226542264676e-05, + "loss": 1.5440622568130493, + "step": 82944 + }, + { + "epoch": 0.862123086197757, + "eval_cos_loss": 0.4398494055494666, + "eval_loss": 1.5285302698612213, + "eval_mse_loss": 1.3086055591702461, + "flow/cos_sim": 0.5601506568491459, + "flow/improvement_ratio": 0.9636457152664661, + "flow/mag_ratio_mean": 0.5373753281310201, + "flow/mag_ratio_std": 0.22595488466322422, + "step": 82944 + }, + { + "epoch": 0.862123086197757, + "eval_cos_loss": 0.4398494055494666, + "eval_loss": 1.5285302698612213, + "eval_mse_loss": 1.3086055591702461, + "eval_runtime": 2.6735, + "eval_samples_per_second": 748.087, + "eval_steps_per_second": 11.969, + "flow/cos_sim": 0.5601506568491459, + "flow/improvement_ratio": 0.9636457152664661, + "flow/mag_ratio_mean": 0.5373753281310201, + "flow/mag_ratio_std": 0.22595488466322422, + "step": 82944 + }, + { + "epoch": 0.8727665810890873, + "grad_norm": 0.1732260137796402, + "learning_rate": 4.2243044041220535e-05, + "loss": 1.5427820682525635, + "step": 83968 + }, + { + "epoch": 0.8727665810890873, + "eval_cos_loss": 0.43963075149804354, + "eval_loss": 1.5304800160229206, + "eval_mse_loss": 1.310664638876915, + "flow/cos_sim": 0.5603692829608917, + "flow/improvement_ratio": 0.9656369760632515, + "flow/mag_ratio_mean": 0.5373333236202598, + "flow/mag_ratio_std": 0.22598782274872065, + "step": 83968 + }, + { + "epoch": 0.8727665810890873, + "eval_cos_loss": 0.43963075149804354, + "eval_loss": 1.5304800160229206, + "eval_mse_loss": 1.310664638876915, + "eval_runtime": 2.6771, + "eval_samples_per_second": 747.086, + "eval_steps_per_second": 11.953, + "flow/cos_sim": 0.5603692829608917, + "flow/improvement_ratio": 0.9656369760632515, + "flow/mag_ratio_mean": 0.5373333236202598, + "flow/mag_ratio_std": 0.22598782274872065, + "step": 83968 + }, + { + "epoch": 0.8834100759804177, + "grad_norm": 0.1780228167772293, + "learning_rate": 3.556989093134011e-05, + "loss": 1.5418999195098877, + "step": 84992 + }, + { + "epoch": 0.8834100759804177, + "eval_cos_loss": 0.4417334571480751, + "eval_loss": 1.5349605418741703, + "eval_mse_loss": 1.3140938207507133, + "flow/cos_sim": 0.5582665763795376, + "flow/improvement_ratio": 0.9667846951633692, + "flow/mag_ratio_mean": 0.5365295764058828, + "flow/mag_ratio_std": 0.22713460819795728, + "step": 84992 + }, + { + "epoch": 0.8834100759804177, + "eval_cos_loss": 0.4417334571480751, + "eval_loss": 1.5349605418741703, + "eval_mse_loss": 1.3140938207507133, + "eval_runtime": 2.545, + "eval_samples_per_second": 785.844, + "eval_steps_per_second": 12.574, + "flow/cos_sim": 0.5582665763795376, + "flow/improvement_ratio": 0.9667846951633692, + "flow/mag_ratio_mean": 0.5365295764058828, + "flow/mag_ratio_std": 0.22713460819795728, + "step": 84992 + }, + { + "epoch": 0.894053570871748, + "grad_norm": 0.1589423269033432, + "learning_rate": 2.9450766643112025e-05, + "loss": 1.5413941144943237, + "step": 86016 + }, + { + "epoch": 0.894053570871748, + "eval_cos_loss": 0.4417336033657193, + "eval_loss": 1.5350174084305763, + "eval_mse_loss": 1.314150609076023, + "flow/cos_sim": 0.5582663975656033, + "flow/improvement_ratio": 0.9610403534024954, + "flow/mag_ratio_mean": 0.5371948201209307, + "flow/mag_ratio_std": 0.22706548869609833, + "step": 86016 + }, + { + "epoch": 0.894053570871748, + "eval_cos_loss": 0.4417336033657193, + "eval_loss": 1.5350174084305763, + "eval_mse_loss": 1.314150609076023, + "eval_runtime": 2.516, + "eval_samples_per_second": 794.928, + "eval_steps_per_second": 12.719, + "flow/cos_sim": 0.5582663975656033, + "flow/improvement_ratio": 0.9610403534024954, + "flow/mag_ratio_mean": 0.5371948201209307, + "flow/mag_ratio_std": 0.22706548869609833, + "step": 86016 + }, + { + "epoch": 0.9046970657630783, + "grad_norm": 0.13952629268169403, + "learning_rate": 2.3898122320136107e-05, + "loss": 1.5443463325500488, + "step": 87040 + }, + { + "epoch": 0.9046970657630783, + "eval_cos_loss": 0.4454851495102048, + "eval_loss": 1.5462815202772617, + "eval_mse_loss": 1.3235389403998852, + "flow/cos_sim": 0.5545149501413107, + "flow/improvement_ratio": 0.9633045084774494, + "flow/mag_ratio_mean": 0.5327793834730983, + "flow/mag_ratio_std": 0.22570591513067484, + "step": 87040 + }, + { + "epoch": 0.9046970657630783, + "eval_cos_loss": 0.4454851495102048, + "eval_loss": 1.5462815202772617, + "eval_mse_loss": 1.3235389403998852, + "eval_runtime": 3.0237, + "eval_samples_per_second": 661.435, + "eval_steps_per_second": 10.583, + "flow/cos_sim": 0.5545149501413107, + "flow/improvement_ratio": 0.9633045084774494, + "flow/mag_ratio_mean": 0.5327793834730983, + "flow/mag_ratio_std": 0.22570591513067484, + "step": 87040 + }, + { + "epoch": 0.9153405606544086, + "grad_norm": 0.1696067899465561, + "learning_rate": 1.8907727303827394e-05, + "loss": 1.5422346591949463, + "step": 88064 + }, + { + "epoch": 0.9153405606544086, + "eval_cos_loss": 0.4382868492975831, + "eval_loss": 1.5258447527885437, + "eval_mse_loss": 1.3067013174295425, + "flow/cos_sim": 0.561713146045804, + "flow/improvement_ratio": 0.9594295676797628, + "flow/mag_ratio_mean": 0.5373271182179451, + "flow/mag_ratio_std": 0.22556039178743958, + "step": 88064 + }, + { + "epoch": 0.9153405606544086, + "eval_cos_loss": 0.4382868492975831, + "eval_loss": 1.5258447527885437, + "eval_mse_loss": 1.3067013174295425, + "eval_runtime": 2.5211, + "eval_samples_per_second": 793.311, + "eval_steps_per_second": 12.693, + "flow/cos_sim": 0.561713146045804, + "flow/improvement_ratio": 0.9594295676797628, + "flow/mag_ratio_mean": 0.5373271182179451, + "flow/mag_ratio_std": 0.22556039178743958, + "step": 88064 + }, + { + "epoch": 0.9259840555457389, + "grad_norm": 0.15072369575500488, + "learning_rate": 1.4491237768113841e-05, + "loss": 1.542992353439331, + "step": 89088 + }, + { + "epoch": 0.9259840555457389, + "eval_cos_loss": 0.4420606214553118, + "eval_loss": 1.5344204120337963, + "eval_mse_loss": 1.3133900947868824, + "flow/cos_sim": 0.557939387857914, + "flow/improvement_ratio": 0.9637042284011841, + "flow/mag_ratio_mean": 0.5341464914381504, + "flow/mag_ratio_std": 0.22699455870315433, + "step": 89088 + }, + { + "epoch": 0.9259840555457389, + "eval_cos_loss": 0.4420606214553118, + "eval_loss": 1.5344204120337963, + "eval_mse_loss": 1.3133900947868824, + "eval_runtime": 2.613, + "eval_samples_per_second": 765.401, + "eval_steps_per_second": 12.246, + "flow/cos_sim": 0.557939387857914, + "flow/improvement_ratio": 0.9637042284011841, + "flow/mag_ratio_mean": 0.5341464914381504, + "flow/mag_ratio_std": 0.22699455870315433, + "step": 89088 + }, + { + "epoch": 0.9366275504370692, + "grad_norm": 0.15198417007923126, + "learning_rate": 1.065392223983186e-05, + "loss": 1.5422855615615845, + "step": 90112 + }, + { + "epoch": 0.9366275504370692, + "eval_cos_loss": 0.4458601539954543, + "eval_loss": 1.5451230816543102, + "eval_mse_loss": 1.3221930228173733, + "flow/cos_sim": 0.5541399177163839, + "flow/improvement_ratio": 0.958930304273963, + "flow/mag_ratio_mean": 0.5324209975078702, + "flow/mag_ratio_std": 0.22846948402002454, + "step": 90112 + }, + { + "epoch": 0.9366275504370692, + "eval_cos_loss": 0.4458601539954543, + "eval_loss": 1.5451230816543102, + "eval_mse_loss": 1.3221930228173733, + "eval_runtime": 3.0844, + "eval_samples_per_second": 648.42, + "eval_steps_per_second": 10.375, + "flow/cos_sim": 0.5541399177163839, + "flow/improvement_ratio": 0.958930304273963, + "flow/mag_ratio_mean": 0.5324209975078702, + "flow/mag_ratio_std": 0.22846948402002454, + "step": 90112 + }, + { + "epoch": 0.9472710453283997, + "grad_norm": 0.1283935010433197, + "learning_rate": 7.40324956991506e-06, + "loss": 1.5435974597930908, + "step": 91136 + }, + { + "epoch": 0.9472710453283997, + "eval_cos_loss": 0.4411879302933812, + "eval_loss": 1.5341791696846485, + "eval_mse_loss": 1.313585203140974, + "flow/cos_sim": 0.5588121470063925, + "flow/improvement_ratio": 0.9638758208602667, + "flow/mag_ratio_mean": 0.5351903941482306, + "flow/mag_ratio_std": 0.22519566072151065, + "step": 91136 + }, + { + "epoch": 0.9472710453283997, + "eval_cos_loss": 0.4411879302933812, + "eval_loss": 1.5341791696846485, + "eval_mse_loss": 1.313585203140974, + "eval_runtime": 2.6762, + "eval_samples_per_second": 747.335, + "eval_steps_per_second": 11.957, + "flow/cos_sim": 0.5588121470063925, + "flow/improvement_ratio": 0.9638758208602667, + "flow/mag_ratio_mean": 0.5351903941482306, + "flow/mag_ratio_std": 0.22519566072151065, + "step": 91136 + }, + { + "epoch": 0.95791454021973, + "grad_norm": 0.12574529647827148, + "learning_rate": 4.7367430129720004e-06, + "loss": 1.5435535907745361, + "step": 92160 + }, + { + "epoch": 0.95791454021973, + "eval_cos_loss": 0.44530233182013035, + "eval_loss": 1.5450147837400436, + "eval_mse_loss": 1.3223636075854301, + "flow/cos_sim": 0.5546976234763861, + "flow/improvement_ratio": 0.9661824498325586, + "flow/mag_ratio_mean": 0.5322676496580243, + "flow/mag_ratio_std": 0.22808025730773807, + "step": 92160 + }, + { + "epoch": 0.95791454021973, + "eval_cos_loss": 0.44530233182013035, + "eval_loss": 1.5450147837400436, + "eval_mse_loss": 1.3223636075854301, + "eval_runtime": 2.5148, + "eval_samples_per_second": 795.3, + "eval_steps_per_second": 12.725, + "flow/cos_sim": 0.5546976234763861, + "flow/improvement_ratio": 0.9661824498325586, + "flow/mag_ratio_mean": 0.5322676496580243, + "flow/mag_ratio_std": 0.22808025730773807, + "step": 92160 + }, + { + "epoch": 0.9685580351110603, + "grad_norm": 0.1402869075536728, + "learning_rate": 2.662784798150131e-06, + "loss": 1.5427674055099487, + "step": 93184 + }, + { + "epoch": 0.9685580351110603, + "eval_cos_loss": 0.44235736317932606, + "eval_loss": 1.536139328032732, + "eval_mse_loss": 1.3149606361985207, + "flow/cos_sim": 0.5576427038758993, + "flow/improvement_ratio": 0.9603242203593254, + "flow/mag_ratio_mean": 0.5352574419230223, + "flow/mag_ratio_std": 0.2266941787675023, + "step": 93184 + }, + { + "epoch": 0.9685580351110603, + "eval_cos_loss": 0.44235736317932606, + "eval_loss": 1.536139328032732, + "eval_mse_loss": 1.3149606361985207, + "eval_runtime": 2.565, + "eval_samples_per_second": 779.726, + "eval_steps_per_second": 12.476, + "flow/cos_sim": 0.5576427038758993, + "flow/improvement_ratio": 0.9603242203593254, + "flow/mag_ratio_mean": 0.5352574419230223, + "flow/mag_ratio_std": 0.2266941787675023, + "step": 93184 + }, + { + "epoch": 0.9792015300023906, + "grad_norm": 0.1338053047657013, + "learning_rate": 1.1797947397548802e-06, + "loss": 1.5422078371047974, + "step": 94208 + }, + { + "epoch": 0.9792015300023906, + "eval_cos_loss": 0.44181027822196484, + "eval_loss": 1.5355971939861774, + "eval_mse_loss": 1.3146920576691628, + "flow/cos_sim": 0.558189669623971, + "flow/improvement_ratio": 0.9626132287085056, + "flow/mag_ratio_mean": 0.5348945092409849, + "flow/mag_ratio_std": 0.22780301421880722, + "step": 94208 + }, + { + "epoch": 0.9792015300023906, + "eval_cos_loss": 0.44181027822196484, + "eval_loss": 1.5355971939861774, + "eval_mse_loss": 1.3146920576691628, + "eval_runtime": 2.4969, + "eval_samples_per_second": 800.987, + "eval_steps_per_second": 12.816, + "flow/cos_sim": 0.558189669623971, + "flow/improvement_ratio": 0.9626132287085056, + "flow/mag_ratio_mean": 0.5348945092409849, + "flow/mag_ratio_std": 0.22780301421880722, + "step": 94208 + }, + { + "epoch": 0.989845024893721, + "grad_norm": 0.12129372358322144, + "learning_rate": 2.9243465362199797e-07, + "loss": 1.541589379310608, + "step": 95232 + }, + { + "epoch": 0.989845024893721, + "eval_cos_loss": 0.44525294937193394, + "eval_loss": 1.5437418557703495, + "eval_mse_loss": 1.3211153745651245, + "flow/cos_sim": 0.5547471418976784, + "flow/improvement_ratio": 0.9627930391579866, + "flow/mag_ratio_mean": 0.5311954086646438, + "flow/mag_ratio_std": 0.2286191936582327, + "step": 95232 + }, + { + "epoch": 0.989845024893721, + "eval_cos_loss": 0.44525294937193394, + "eval_loss": 1.5437418557703495, + "eval_mse_loss": 1.3211153745651245, + "eval_runtime": 2.5197, + "eval_samples_per_second": 793.751, + "eval_steps_per_second": 12.7, + "flow/cos_sim": 0.5547471418976784, + "flow/improvement_ratio": 0.9627930391579866, + "flow/mag_ratio_mean": 0.5311954086646438, + "flow/mag_ratio_std": 0.2286191936582327, + "step": 95232 + } + ], + "logging_steps": 1024, + "max_steps": 96209, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1024, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}