{ "epoch": 3.0, "eval_logits/chosen": -2.6887364387512207, "eval_logits/rejected": -2.6727285385131836, "eval_logps/chosen": -75.25064849853516, "eval_logps/rejected": -85.66250610351562, "eval_loss": 0.9378770589828491, "eval_rewards/accuracies": 0.3472222089767456, "eval_rewards/chosen": -0.34173667430877686, "eval_rewards/margins": 1.698632836341858, "eval_rewards/rejected": -2.0403695106506348, "eval_runtime": 119.487, "eval_samples": 2000, "eval_samples_per_second": 16.738, "eval_steps_per_second": 0.527, "total_flos": 0.0, "train_loss": 0.49774221859604084, "train_runtime": 9884.8529, "train_samples": 18340, "train_samples_per_second": 5.566, "train_steps_per_second": 0.087 }