NicholasCorrado's picture
End of training
4fd2b06 verified
raw
history blame contribute delete
582 Bytes
{
"epoch": 1.9986120749479528,
"eval_logits/chosen": 1.8379676342010498,
"eval_logits/rejected": 4.30747127532959,
"eval_logps/chosen": -870.468017578125,
"eval_logps/rejected": -1735.4202880859375,
"eval_loss": 0.24658846855163574,
"eval_rewards/accuracies": 0.896616518497467,
"eval_rewards/chosen": -4.8588385581970215,
"eval_rewards/margins": 8.39414119720459,
"eval_rewards/rejected": -13.252979278564453,
"eval_runtime": 384.8738,
"eval_samples": 8491,
"eval_samples_per_second": 22.062,
"eval_steps_per_second": 0.346
}