{ "epoch": 1.9986120749479528, "eval_logits/chosen": 1.8379676342010498, "eval_logits/rejected": 4.30747127532959, "eval_logps/chosen": -870.468017578125, "eval_logps/rejected": -1735.4202880859375, "eval_loss": 0.24658846855163574, "eval_rewards/accuracies": 0.896616518497467, "eval_rewards/chosen": -4.8588385581970215, "eval_rewards/margins": 8.39414119720459, "eval_rewards/rejected": -13.252979278564453, "eval_runtime": 384.8738, "eval_samples": 8491, "eval_samples_per_second": 22.062, "eval_steps_per_second": 0.346 }