| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.996996996996997, |
| "eval_steps": 66, |
| "global_step": 166, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06006006006006006, |
| "grad_norm": 34.52524185180664, |
| "learning_rate": 9.936305732484077e-05, |
| "logits/chosen": -0.5943359136581421, |
| "logits/rejected": -0.6087890863418579, |
| "logps/chosen": -16.725000381469727, |
| "logps/rejected": -27.884374618530273, |
| "loss": 0.6565, |
| "nll_loss": 0.17890624701976776, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.008593750186264515, |
| "rewards/margins": 0.0693359375, |
| "rewards/rejected": -0.07792969048023224, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12012012012012012, |
| "grad_norm": 39.96049499511719, |
| "learning_rate": 9.299363057324841e-05, |
| "logits/chosen": -0.6298828125, |
| "logits/rejected": -0.665820300579071, |
| "logps/chosen": -21.274999618530273, |
| "logps/rejected": -34.41875076293945, |
| "loss": 0.5964, |
| "nll_loss": 0.22294922173023224, |
| "rewards/accuracies": 0.6312500238418579, |
| "rewards/chosen": -0.4951171875, |
| "rewards/margins": 0.8861328363418579, |
| "rewards/rejected": -1.3815429210662842, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18018018018018017, |
| "grad_norm": 26.530200958251953, |
| "learning_rate": 8.662420382165606e-05, |
| "logits/chosen": -0.60693359375, |
| "logits/rejected": -0.6392577886581421, |
| "logps/chosen": -20.003124237060547, |
| "logps/rejected": -31.056249618530273, |
| "loss": 0.6189, |
| "nll_loss": 0.20683594048023224, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.4129394590854645, |
| "rewards/margins": 0.711865246295929, |
| "rewards/rejected": -1.1256835460662842, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24024024024024024, |
| "grad_norm": 31.651872634887695, |
| "learning_rate": 8.02547770700637e-05, |
| "logits/chosen": -0.57861328125, |
| "logits/rejected": -0.6119140386581421, |
| "logps/chosen": -19.546875, |
| "logps/rejected": -29.931249618530273, |
| "loss": 0.6779, |
| "nll_loss": 0.21328124403953552, |
| "rewards/accuracies": 0.6312500238418579, |
| "rewards/chosen": -0.4297851622104645, |
| "rewards/margins": 0.8600097894668579, |
| "rewards/rejected": -1.2890136241912842, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3003003003003003, |
| "grad_norm": 26.210861206054688, |
| "learning_rate": 7.388535031847134e-05, |
| "logits/chosen": -0.5921875238418579, |
| "logits/rejected": -0.62109375, |
| "logps/chosen": -20.362499237060547, |
| "logps/rejected": -33.70624923706055, |
| "loss": 0.7017, |
| "nll_loss": 0.208984375, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.5234375, |
| "rewards/margins": 1.094140648841858, |
| "rewards/rejected": -1.617285132408142, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.36036036036036034, |
| "grad_norm": 25.984054565429688, |
| "learning_rate": 6.751592356687899e-05, |
| "logits/chosen": -0.607226550579071, |
| "logits/rejected": -0.625195324420929, |
| "logps/chosen": -19.943750381469727, |
| "logps/rejected": -33.96562576293945, |
| "loss": 0.5971, |
| "nll_loss": 0.2099609375, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.3338378965854645, |
| "rewards/margins": 0.973828136920929, |
| "rewards/rejected": -1.3087890148162842, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3963963963963964, |
| "eval_logits/chosen": -0.6258928775787354, |
| "eval_logits/rejected": -0.6188616156578064, |
| "eval_logps/chosen": -21.77321434020996, |
| "eval_logps/rejected": -34.83571243286133, |
| "eval_loss": 0.6648827195167542, |
| "eval_nll_loss": 0.20853795111179352, |
| "eval_rewards/accuracies": 0.6821428537368774, |
| "eval_rewards/chosen": -0.4113839268684387, |
| "eval_rewards/margins": 1.0587611198425293, |
| "eval_rewards/rejected": -1.470479965209961, |
| "eval_runtime": 26.32, |
| "eval_samples_per_second": 10.562, |
| "eval_steps_per_second": 1.33, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.42042042042042044, |
| "grad_norm": 26.273643493652344, |
| "learning_rate": 6.114649681528662e-05, |
| "logits/chosen": -0.618945300579071, |
| "logits/rejected": -0.6214843988418579, |
| "logps/chosen": -18.078125, |
| "logps/rejected": -30.125, |
| "loss": 0.5691, |
| "nll_loss": 0.19150391221046448, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.13154296576976776, |
| "rewards/margins": 1.20947265625, |
| "rewards/rejected": -1.339257836341858, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4804804804804805, |
| "grad_norm": 21.918045043945312, |
| "learning_rate": 5.477707006369427e-05, |
| "logits/chosen": -0.603515625, |
| "logits/rejected": -0.615527331829071, |
| "logps/chosen": -20.246875762939453, |
| "logps/rejected": -33.287498474121094, |
| "loss": 0.5856, |
| "nll_loss": 0.21635742485523224, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.5206054449081421, |
| "rewards/margins": 1.24072265625, |
| "rewards/rejected": -1.760839819908142, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 32.28929901123047, |
| "learning_rate": 4.840764331210191e-05, |
| "logits/chosen": -0.5863281488418579, |
| "logits/rejected": -0.635546863079071, |
| "logps/chosen": -20.846874237060547, |
| "logps/rejected": -33.584373474121094, |
| "loss": 0.6504, |
| "nll_loss": 0.2353515625, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.680957019329071, |
| "rewards/margins": 1.7887694835662842, |
| "rewards/rejected": -2.4681639671325684, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6006006006006006, |
| "grad_norm": 42.50503921508789, |
| "learning_rate": 4.2038216560509556e-05, |
| "logits/chosen": -0.615429699420929, |
| "logits/rejected": -0.605273425579071, |
| "logps/chosen": -20.068750381469727, |
| "logps/rejected": -31.799999237060547, |
| "loss": 0.6865, |
| "nll_loss": 0.206787109375, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.53125, |
| "rewards/margins": 1.4816405773162842, |
| "rewards/rejected": -2.0130858421325684, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6606606606606606, |
| "grad_norm": 35.92078399658203, |
| "learning_rate": 3.56687898089172e-05, |
| "logits/chosen": -0.614941418170929, |
| "logits/rejected": -0.617382824420929, |
| "logps/chosen": -20.15625, |
| "logps/rejected": -36.556251525878906, |
| "loss": 0.6166, |
| "nll_loss": 0.20083007216453552, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.663525402545929, |
| "rewards/margins": 1.5426757335662842, |
| "rewards/rejected": -2.207714796066284, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.7207207207207207, |
| "grad_norm": 35.85774612426758, |
| "learning_rate": 2.929936305732484e-05, |
| "logits/chosen": -0.6103515625, |
| "logits/rejected": -0.6181640625, |
| "logps/chosen": -21.934375762939453, |
| "logps/rejected": -35.60625076293945, |
| "loss": 0.5358, |
| "nll_loss": 0.21962890028953552, |
| "rewards/accuracies": 0.706250011920929, |
| "rewards/chosen": -0.369140625, |
| "rewards/margins": 1.749414086341858, |
| "rewards/rejected": -2.1181640625, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7807807807807807, |
| "grad_norm": 29.95037078857422, |
| "learning_rate": 2.2929936305732484e-05, |
| "logits/chosen": -0.596875011920929, |
| "logits/rejected": -0.6224609613418579, |
| "logps/chosen": -18.59375, |
| "logps/rejected": -35.493751525878906, |
| "loss": 0.4837, |
| "nll_loss": 0.19125977158546448, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -0.36542969942092896, |
| "rewards/margins": 2.2035155296325684, |
| "rewards/rejected": -2.5687499046325684, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7927927927927928, |
| "eval_logits/chosen": -0.6104352474212646, |
| "eval_logits/rejected": -0.6017857193946838, |
| "eval_logps/chosen": -22.016071319580078, |
| "eval_logps/rejected": -35.73214340209961, |
| "eval_loss": 0.6540421843528748, |
| "eval_nll_loss": 0.21071428060531616, |
| "eval_rewards/accuracies": 0.6714285612106323, |
| "eval_rewards/chosen": -0.5337053537368774, |
| "eval_rewards/margins": 1.3877789974212646, |
| "eval_rewards/rejected": -1.9204241037368774, |
| "eval_runtime": 26.2394, |
| "eval_samples_per_second": 10.595, |
| "eval_steps_per_second": 1.334, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.8408408408408409, |
| "grad_norm": 17.50779914855957, |
| "learning_rate": 1.6560509554140128e-05, |
| "logits/chosen": -0.621874988079071, |
| "logits/rejected": -0.6468750238418579, |
| "logps/chosen": -18.740625381469727, |
| "logps/rejected": -36.33124923706055, |
| "loss": 0.5761, |
| "nll_loss": 0.20664063096046448, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.4644531309604645, |
| "rewards/margins": 1.7863280773162842, |
| "rewards/rejected": -2.2529296875, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9009009009009009, |
| "grad_norm": 24.695812225341797, |
| "learning_rate": 1.0191082802547772e-05, |
| "logits/chosen": -0.613964855670929, |
| "logits/rejected": -0.6097656488418579, |
| "logps/chosen": -17.393749237060547, |
| "logps/rejected": -33.525001525878906, |
| "loss": 0.4594, |
| "nll_loss": 0.19106444716453552, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.20957031846046448, |
| "rewards/margins": 2.38232421875, |
| "rewards/rejected": -2.591796875, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.960960960960961, |
| "grad_norm": 17.522380828857422, |
| "learning_rate": 3.821656050955414e-06, |
| "logits/chosen": -0.612500011920929, |
| "logits/rejected": -0.6226562261581421, |
| "logps/chosen": -19.4375, |
| "logps/rejected": -35.01250076293945, |
| "loss": 0.5153, |
| "nll_loss": 0.20332030951976776, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -0.4234375059604645, |
| "rewards/margins": 2.30859375, |
| "rewards/rejected": -2.732714891433716, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.996996996996997, |
| "step": 166, |
| "total_flos": 0.0, |
| "train_loss": 0.6033119293580572, |
| "train_runtime": 697.6675, |
| "train_samples_per_second": 3.817, |
| "train_steps_per_second": 0.238 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 166, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 66, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|