sfulay's picture
End of training
c473213 verified
raw
history blame
774 Bytes
{
"epoch": 0.9965156794425087,
"eval_logits/chosen": -2.281348943710327,
"eval_logits/rejected": -2.2318127155303955,
"eval_logps/chosen": -292.6180114746094,
"eval_logps/rejected": -316.9007568359375,
"eval_loss": 0.6281932592391968,
"eval_rewards/accuracies": 0.703125,
"eval_rewards/chosen": -0.2998806834220886,
"eval_rewards/margins": 0.2425060272216797,
"eval_rewards/rejected": -0.5423867106437683,
"eval_runtime": 104.9019,
"eval_samples": 2000,
"eval_samples_per_second": 19.065,
"eval_steps_per_second": 0.305,
"total_flos": 0.0,
"train_loss": 0.6509068312344851,
"train_runtime": 3880.8302,
"train_samples": 18340,
"train_samples_per_second": 4.726,
"train_steps_per_second": 0.037
}