{ "epoch": 1.0, "eval_logits/chosen": -2.4944536685943604, "eval_logits/rejected": -2.4963433742523193, "eval_logps/chosen": -269.25555419921875, "eval_logps/rejected": -253.21238708496094, "eval_loss": 0.6399702429771423, "eval_rewards/accuracies": 0.6370000243186951, "eval_rewards/chosen": 0.030110126361250877, "eval_rewards/margins": 0.05743245780467987, "eval_rewards/rejected": -0.027322327718138695, "eval_runtime": 803.6977, "eval_samples": 2000, "eval_samples_per_second": 2.488, "eval_steps_per_second": 0.311, "train_loss": 0.6598132096035942, "train_runtime": 45126.4521, "train_samples": 61135, "train_samples_per_second": 1.355, "train_steps_per_second": 0.021 }