{ "epoch": 3.0, "eval_logits/chosen": 0.3641189634799957, "eval_logits/rejected": 2.345853090286255, "eval_logps/chosen": -371.7200927734375, "eval_logps/rejected": -656.34228515625, "eval_loss": 0.004054788034409285, "eval_rewards/accuracies": 0.9983165264129639, "eval_rewards/chosen": 1.7269809246063232, "eval_rewards/margins": 17.098175048828125, "eval_rewards/rejected": -15.371195793151855, "eval_runtime": 270.3044, "eval_samples": 9500, "eval_samples_per_second": 35.146, "eval_steps_per_second": 1.099, "train_loss": 0.03725933715852631, "train_runtime": 48940.1741, "train_samples": 188284, "train_samples_per_second": 11.542, "train_steps_per_second": 0.18 }