{ "epoch": 2.0, "eval_logits/chosen": -2.5986759662628174, "eval_logits/rejected": -2.371307134628296, "eval_logps/chosen": -162.12734985351562, "eval_logps/rejected": -348.43353271484375, "eval_loss": 0.024077776819467545, "eval_rewards/accuracies": 0.9940476417541504, "eval_rewards/chosen": -1.3912619352340698, "eval_rewards/margins": 24.882064819335938, "eval_rewards/rejected": -26.27332878112793, "eval_runtime": 908.8237, "eval_samples": 2000, "eval_samples_per_second": 2.201, "eval_steps_per_second": 0.069 }