{ "epoch": 1.0, "eval_logits/chosen": 2.193073272705078, "eval_logits/rejected": 3.0313031673431396, "eval_logps/chosen": -469.1545104980469, "eval_logps/rejected": -617.989501953125, "eval_loss": 0.5419281721115112, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -2.0652458667755127, "eval_rewards/margins": 1.4880279302597046, "eval_rewards/rejected": -3.5532736778259277, "eval_runtime": 103.7243, "eval_samples": 2000, "eval_samples_per_second": 19.282, "eval_steps_per_second": 0.309, "total_flos": 0.0, "train_loss": 0.5571172007955767, "train_runtime": 12724.9946, "train_samples": 61134, "train_samples_per_second": 4.804, "train_steps_per_second": 0.038 }