| { |
| "epoch": 3.997808219178082, |
| "eval_logits/chosen": -2.860849618911743, |
| "eval_logits/rejected": -2.3769683837890625, |
| "eval_logps/chosen": -706.0082397460938, |
| "eval_logps/rejected": -596.4307861328125, |
| "eval_loss": 0.40534183382987976, |
| "eval_rewards/accuracies": 0.7142857313156128, |
| "eval_rewards/chosen": 2.9905052185058594, |
| "eval_rewards/margins": 4.6127777099609375, |
| "eval_rewards/rejected": -1.6222723722457886, |
| "eval_runtime": 14.2958, |
| "eval_samples_per_second": 7.695, |
| "eval_steps_per_second": 0.979, |
| "total_flos": 519662108934144.0, |
| "train_loss": 0.17924007512469045, |
| "train_runtime": 53882.8337, |
| "train_samples_per_second": 3.251, |
| "train_steps_per_second": 0.102 |
| } |