{ "epoch": 1.0, "eval_logits/chosen": -2.6974706649780273, "eval_logits/rejected": -2.665019989013672, "eval_logps/chosen": -311.6889953613281, "eval_logps/rejected": -322.95947265625, "eval_loss": 0.2408759742975235, "eval_pred_label": 9189.576171875, "eval_rewards/accuracies": 0.734000027179718, "eval_rewards/chosen": -2.7431609630584717, "eval_rewards/margins": 3.6228184700012207, "eval_rewards/rejected": -6.36598014831543, "eval_runtime": 452.5604, "eval_samples": 2000, "eval_samples_per_second": 4.419, "eval_steps_per_second": 0.276, "eval_use_label": 6842.423828125, "train_loss": 0.31699458866219243, "train_runtime": 25218.7851, "train_samples": 61135, "train_samples_per_second": 2.424, "train_steps_per_second": 0.038 }