{ "epoch": 0.9993060374739764, "eval_logits/chosen": 0.6849595308303833, "eval_logits/rejected": 1.771243691444397, "eval_logps/chosen": -536.7197875976562, "eval_logps/rejected": -805.5457763671875, "eval_loss": 0.28813818097114563, "eval_rewards/accuracies": 0.8815789222717285, "eval_rewards/chosen": -1.5213572978973389, "eval_rewards/margins": 2.4328768253326416, "eval_rewards/rejected": -3.9542336463928223, "eval_runtime": 387.4996, "eval_samples": 8491, "eval_samples_per_second": 21.912, "eval_steps_per_second": 0.343 }