{ "epoch": 1.0, "eval_logits/chosen": 1.397847294807434, "eval_logits/rejected": 2.356464147567749, "eval_logps/chosen": -430.1231384277344, "eval_logps/rejected": -566.2046508789062, "eval_loss": 0.534032940864563, "eval_rewards/accuracies": 0.78515625, "eval_rewards/chosen": -1.6749324798583984, "eval_rewards/margins": 1.3604930639266968, "eval_rewards/rejected": -3.0354254245758057, "eval_runtime": 103.4008, "eval_samples": 2000, "eval_samples_per_second": 19.342, "eval_steps_per_second": 0.309, "total_flos": 0.0, "train_loss": 0.5431942655451627, "train_runtime": 12655.0807, "train_samples": 61134, "train_samples_per_second": 4.831, "train_steps_per_second": 0.038 }