{ "epoch": 1.0, "eval_logits/chosen": -2.83455753326416, "eval_logits/rejected": -2.824216604232788, "eval_logps/chosen": -282.00970458984375, "eval_logps/rejected": -268.7319030761719, "eval_loss": 0.6687781810760498, "eval_pred_label": 10682.2041015625, "eval_rewards/accuracies": 0.7210000157356262, "eval_rewards/chosen": -0.0034403554163873196, "eval_rewards/margins": 0.6410075426101685, "eval_rewards/rejected": -0.6444479823112488, "eval_runtime": 855.5169, "eval_samples": 2000, "eval_samples_per_second": 2.338, "eval_steps_per_second": 0.292, "eval_use_label": 21379.796875, "train_loss": 0.6626948830969046, "train_runtime": 47570.4937, "train_samples": 61135, "train_samples_per_second": 1.285, "train_steps_per_second": 0.02 }