{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.996996996996997, "eval_steps": 66, "global_step": 166, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06006006006006006, "grad_norm": 34.52524185180664, "learning_rate": 9.936305732484077e-05, "logits/chosen": -0.5943359136581421, "logits/rejected": -0.6087890863418579, "logps/chosen": -16.725000381469727, "logps/rejected": -27.884374618530273, "loss": 0.6565, "nll_loss": 0.17890624701976776, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.008593750186264515, "rewards/margins": 0.0693359375, "rewards/rejected": -0.07792969048023224, "step": 10 }, { "epoch": 0.12012012012012012, "grad_norm": 39.96049499511719, "learning_rate": 9.299363057324841e-05, "logits/chosen": -0.6298828125, "logits/rejected": -0.665820300579071, "logps/chosen": -21.274999618530273, "logps/rejected": -34.41875076293945, "loss": 0.5964, "nll_loss": 0.22294922173023224, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.4951171875, "rewards/margins": 0.8861328363418579, "rewards/rejected": -1.3815429210662842, "step": 20 }, { "epoch": 0.18018018018018017, "grad_norm": 26.530200958251953, "learning_rate": 8.662420382165606e-05, "logits/chosen": -0.60693359375, "logits/rejected": -0.6392577886581421, "logps/chosen": -20.003124237060547, "logps/rejected": -31.056249618530273, "loss": 0.6189, "nll_loss": 0.20683594048023224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4129394590854645, "rewards/margins": 0.711865246295929, "rewards/rejected": -1.1256835460662842, "step": 30 }, { "epoch": 0.24024024024024024, "grad_norm": 31.651872634887695, "learning_rate": 8.02547770700637e-05, "logits/chosen": -0.57861328125, "logits/rejected": -0.6119140386581421, "logps/chosen": -19.546875, "logps/rejected": -29.931249618530273, "loss": 0.6779, "nll_loss": 0.21328124403953552, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.4297851622104645, "rewards/margins": 0.8600097894668579, "rewards/rejected": -1.2890136241912842, "step": 40 }, { "epoch": 0.3003003003003003, "grad_norm": 26.210861206054688, "learning_rate": 7.388535031847134e-05, "logits/chosen": -0.5921875238418579, "logits/rejected": -0.62109375, "logps/chosen": -20.362499237060547, "logps/rejected": -33.70624923706055, "loss": 0.7017, "nll_loss": 0.208984375, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5234375, "rewards/margins": 1.094140648841858, "rewards/rejected": -1.617285132408142, "step": 50 }, { "epoch": 0.36036036036036034, "grad_norm": 25.984054565429688, "learning_rate": 6.751592356687899e-05, "logits/chosen": -0.607226550579071, "logits/rejected": -0.625195324420929, "logps/chosen": -19.943750381469727, "logps/rejected": -33.96562576293945, "loss": 0.5971, "nll_loss": 0.2099609375, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.3338378965854645, "rewards/margins": 0.973828136920929, "rewards/rejected": -1.3087890148162842, "step": 60 }, { "epoch": 0.3963963963963964, "eval_logits/chosen": -0.6258928775787354, "eval_logits/rejected": -0.6188616156578064, "eval_logps/chosen": -21.77321434020996, "eval_logps/rejected": -34.83571243286133, "eval_loss": 0.6648827195167542, "eval_nll_loss": 0.20853795111179352, "eval_rewards/accuracies": 0.6821428537368774, "eval_rewards/chosen": -0.4113839268684387, "eval_rewards/margins": 1.0587611198425293, "eval_rewards/rejected": -1.470479965209961, "eval_runtime": 26.32, "eval_samples_per_second": 10.562, "eval_steps_per_second": 1.33, "step": 66 }, { "epoch": 0.42042042042042044, "grad_norm": 26.273643493652344, "learning_rate": 6.114649681528662e-05, "logits/chosen": -0.618945300579071, "logits/rejected": -0.6214843988418579, "logps/chosen": -18.078125, "logps/rejected": -30.125, "loss": 0.5691, "nll_loss": 0.19150391221046448, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.13154296576976776, "rewards/margins": 1.20947265625, "rewards/rejected": -1.339257836341858, "step": 70 }, { "epoch": 0.4804804804804805, "grad_norm": 21.918045043945312, "learning_rate": 5.477707006369427e-05, "logits/chosen": -0.603515625, "logits/rejected": -0.615527331829071, "logps/chosen": -20.246875762939453, "logps/rejected": -33.287498474121094, "loss": 0.5856, "nll_loss": 0.21635742485523224, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5206054449081421, "rewards/margins": 1.24072265625, "rewards/rejected": -1.760839819908142, "step": 80 }, { "epoch": 0.5405405405405406, "grad_norm": 32.28929901123047, "learning_rate": 4.840764331210191e-05, "logits/chosen": -0.5863281488418579, "logits/rejected": -0.635546863079071, "logps/chosen": -20.846874237060547, "logps/rejected": -33.584373474121094, "loss": 0.6504, "nll_loss": 0.2353515625, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.680957019329071, "rewards/margins": 1.7887694835662842, "rewards/rejected": -2.4681639671325684, "step": 90 }, { "epoch": 0.6006006006006006, "grad_norm": 42.50503921508789, "learning_rate": 4.2038216560509556e-05, "logits/chosen": -0.615429699420929, "logits/rejected": -0.605273425579071, "logps/chosen": -20.068750381469727, "logps/rejected": -31.799999237060547, "loss": 0.6865, "nll_loss": 0.206787109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.53125, "rewards/margins": 1.4816405773162842, "rewards/rejected": -2.0130858421325684, "step": 100 }, { "epoch": 0.6606606606606606, "grad_norm": 35.92078399658203, "learning_rate": 3.56687898089172e-05, "logits/chosen": -0.614941418170929, "logits/rejected": -0.617382824420929, "logps/chosen": -20.15625, "logps/rejected": -36.556251525878906, "loss": 0.6166, "nll_loss": 0.20083007216453552, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.663525402545929, "rewards/margins": 1.5426757335662842, "rewards/rejected": -2.207714796066284, "step": 110 }, { "epoch": 0.7207207207207207, "grad_norm": 35.85774612426758, "learning_rate": 2.929936305732484e-05, "logits/chosen": -0.6103515625, "logits/rejected": -0.6181640625, "logps/chosen": -21.934375762939453, "logps/rejected": -35.60625076293945, "loss": 0.5358, "nll_loss": 0.21962890028953552, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.369140625, "rewards/margins": 1.749414086341858, "rewards/rejected": -2.1181640625, "step": 120 }, { "epoch": 0.7807807807807807, "grad_norm": 29.95037078857422, "learning_rate": 2.2929936305732484e-05, "logits/chosen": -0.596875011920929, "logits/rejected": -0.6224609613418579, "logps/chosen": -18.59375, "logps/rejected": -35.493751525878906, "loss": 0.4837, "nll_loss": 0.19125977158546448, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.36542969942092896, "rewards/margins": 2.2035155296325684, "rewards/rejected": -2.5687499046325684, "step": 130 }, { "epoch": 0.7927927927927928, "eval_logits/chosen": -0.6104352474212646, "eval_logits/rejected": -0.6017857193946838, "eval_logps/chosen": -22.016071319580078, "eval_logps/rejected": -35.73214340209961, "eval_loss": 0.6540421843528748, "eval_nll_loss": 0.21071428060531616, "eval_rewards/accuracies": 0.6714285612106323, "eval_rewards/chosen": -0.5337053537368774, "eval_rewards/margins": 1.3877789974212646, "eval_rewards/rejected": -1.9204241037368774, "eval_runtime": 26.2394, "eval_samples_per_second": 10.595, "eval_steps_per_second": 1.334, "step": 132 }, { "epoch": 0.8408408408408409, "grad_norm": 17.50779914855957, "learning_rate": 1.6560509554140128e-05, "logits/chosen": -0.621874988079071, "logits/rejected": -0.6468750238418579, "logps/chosen": -18.740625381469727, "logps/rejected": -36.33124923706055, "loss": 0.5761, "nll_loss": 0.20664063096046448, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4644531309604645, "rewards/margins": 1.7863280773162842, "rewards/rejected": -2.2529296875, "step": 140 }, { "epoch": 0.9009009009009009, "grad_norm": 24.695812225341797, "learning_rate": 1.0191082802547772e-05, "logits/chosen": -0.613964855670929, "logits/rejected": -0.6097656488418579, "logps/chosen": -17.393749237060547, "logps/rejected": -33.525001525878906, "loss": 0.4594, "nll_loss": 0.19106444716453552, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20957031846046448, "rewards/margins": 2.38232421875, "rewards/rejected": -2.591796875, "step": 150 }, { "epoch": 0.960960960960961, "grad_norm": 17.522380828857422, "learning_rate": 3.821656050955414e-06, "logits/chosen": -0.612500011920929, "logits/rejected": -0.6226562261581421, "logps/chosen": -19.4375, "logps/rejected": -35.01250076293945, "loss": 0.5153, "nll_loss": 0.20332030951976776, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.4234375059604645, "rewards/margins": 2.30859375, "rewards/rejected": -2.732714891433716, "step": 160 }, { "epoch": 0.996996996996997, "step": 166, "total_flos": 0.0, "train_loss": 0.6033119293580572, "train_runtime": 697.6675, "train_samples_per_second": 3.817, "train_steps_per_second": 0.238 } ], "logging_steps": 10, "max_steps": 166, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 66, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }