NicholasCorrado's picture
End of training
0387d47 verified
{
"epoch": 1.9986120749479528,
"eval_alpha_0_uf": 0.5116097927093506,
"eval_alpha_1_rlced_conifer": 0.4883902370929718,
"eval_excess_loss": 0.03742467654742917,
"eval_logits/chosen": 1.9375910758972168,
"eval_logits/chosen_0_uf": 3.0999701023101807,
"eval_logits/chosen_1_rlced_conifer": 1.572981357574463,
"eval_logits/rejected": 3.877798080444336,
"eval_logits/rejected_0_uf": 5.175272464752197,
"eval_logits/rejected_1_rlced_conifer": 3.567551374435425,
"eval_logps/chosen": -677.5836791992188,
"eval_logps/chosen_0_uf": -497.45623779296875,
"eval_logps/chosen_1_rlced_conifer": -728.9337158203125,
"eval_logps/rejected": -1262.6171875,
"eval_logps/rejected_0_uf": -584.9677124023438,
"eval_logps/rejected_1_rlced_conifer": -1451.01318359375,
"eval_loss": 0.23950526118278503,
"eval_rewards/accuracies": 0.8778195381164551,
"eval_rewards/accuracies_0_uf": 0.7632744312286377,
"eval_rewards/accuracies_1_rlced_conifer": 0.9097297191619873,
"eval_rewards/chosen": -2.8511428833007812,
"eval_rewards/chosen_0_uf": -2.081967353820801,
"eval_rewards/chosen_1_rlced_conifer": -3.0535335540771484,
"eval_rewards/margins": 5.737672805786133,
"eval_rewards/margins_0_uf": 1.3516454696655273,
"eval_rewards/margins_1_rlced_conifer": 6.98124361038208,
"eval_rewards/rejected": -8.588815689086914,
"eval_rewards/rejected_0_uf": -3.433612823486328,
"eval_rewards/rejected_1_rlced_conifer": -10.03477668762207,
"eval_runtime": 389.2096,
"eval_samples": 8491,
"eval_samples_per_second": 21.816,
"eval_steps_per_second": 0.342,
"eval_task_excess_loss_0_uf": 0.0723980620206721,
"eval_task_excess_loss_1_rlced_conifer": 0.04272522438225138,
"eval_task_loss_0_uf": 0.5184707641601562,
"eval_task_loss_1_rlced_conifer": 0.17867246270179749
}