{ "epoch": 0.9965156794425087, "eval_logits/chosen": -2.396684169769287, "eval_logits/rejected": -2.352323055267334, "eval_logps/chosen": -293.2704772949219, "eval_logps/rejected": -318.3273010253906, "eval_loss": 0.6241225600242615, "eval_rewards/accuracies": 0.6953125, "eval_rewards/chosen": -0.3064056634902954, "eval_rewards/margins": 0.2502462863922119, "eval_rewards/rejected": -0.5566520094871521, "eval_runtime": 104.8654, "eval_samples": 2000, "eval_samples_per_second": 19.072, "eval_steps_per_second": 0.305 }