{ "epoch": 1.0, "eval_logits/chosen": -0.03224845603108406, "eval_logits/rejected": -0.051055099815130234, "eval_logps/chosen": -196.12477111816406, "eval_logps/rejected": -215.46804809570312, "eval_loss": 0.5139751434326172, "eval_rewards/accuracies": 0.6824324131011963, "eval_rewards/chosen": -0.6439886093139648, "eval_rewards/margins": 0.6677516102790833, "eval_rewards/rejected": -1.3117402791976929, "eval_runtime": 216.2859, "eval_samples": 2341, "eval_samples_per_second": 10.824, "eval_steps_per_second": 0.171 }