{ "epoch": 2.989547038327526, "eval_logits/chosen": 2.499317169189453, "eval_logits/rejected": 3.780329704284668, "eval_logps/chosen": -561.7738037109375, "eval_logps/rejected": -756.44921875, "eval_loss": 0.5893987417221069, "eval_rewards/accuracies": 0.7578125, "eval_rewards/chosen": -2.991438865661621, "eval_rewards/margins": 1.9464330673217773, "eval_rewards/rejected": -4.93787145614624, "eval_runtime": 104.5148, "eval_samples": 2000, "eval_samples_per_second": 19.136, "eval_steps_per_second": 0.306, "total_flos": 0.0, "train_loss": 0.22287911155840734, "train_runtime": 11425.5472, "train_samples": 18339, "train_samples_per_second": 4.815, "train_steps_per_second": 0.038 }