{ "epoch": 0.9965156794425087, "eval_logits/chosen": -2.396684169769287, "eval_logits/rejected": -2.352323055267334, "eval_logps/chosen": -293.2704772949219, "eval_logps/rejected": -318.3273010253906, "eval_loss": 0.6241225600242615, "eval_rewards/accuracies": 0.6953125, "eval_rewards/chosen": -0.3064056634902954, "eval_rewards/margins": 0.2502462863922119, "eval_rewards/rejected": -0.5566520094871521, "eval_runtime": 104.8654, "eval_samples": 2000, "eval_samples_per_second": 19.072, "eval_steps_per_second": 0.305, "total_flos": 0.0, "train_loss": 0.6510118654557875, "train_runtime": 3809.3826, "train_samples": 18340, "train_samples_per_second": 4.814, "train_steps_per_second": 0.038 }