{ "epoch": 2.76, "eval_logits/chosen": -2.274653911590576, "eval_logits/rejected": -2.2999308109283447, "eval_logps/chosen": -190.57220458984375, "eval_logps/rejected": -132.8740234375, "eval_loss": 0.68434077501297, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": 0.04404526203870773, "eval_rewards/margins": 0.03693275526165962, "eval_rewards/rejected": 0.0071125030517578125, "eval_runtime": 6.6846, "eval_samples": 30, "eval_samples_per_second": 4.488, "eval_steps_per_second": 0.15 }