{ "epoch": 3.0, "eval_logits/chosen": -2.952404499053955, "eval_logits/rejected": -2.754680871963501, "eval_logps/chosen": -247.9913787841797, "eval_logps/rejected": -735.1937866210938, "eval_loss": 0.002894825069233775, "eval_rewards/accuracies": 0.9991582632064819, "eval_rewards/chosen": 1.4965133666992188, "eval_rewards/margins": 34.79835510253906, "eval_rewards/rejected": -33.301841735839844, "eval_runtime": 463.448, "eval_samples": 9500, "eval_samples_per_second": 20.499, "eval_steps_per_second": 0.641, "train_loss": 0.022219736984536855, "train_runtime": 94567.8662, "train_samples": 188284, "train_samples_per_second": 5.973, "train_steps_per_second": 0.093 }