|
{ |
|
"epoch": 1.9986120749479528, |
|
"eval_alpha_0_uf": 0.2676650285720825, |
|
"eval_alpha_1_rlced_conifer": 0.7323350310325623, |
|
"eval_excess_loss": 0.030186501687000458, |
|
"eval_logits/chosen": 0.4870572090148926, |
|
"eval_logits/chosen_0_uf": 1.0820865631103516, |
|
"eval_logits/chosen_1_rlced_conifer": 0.28715550899505615, |
|
"eval_logits/rejected": 2.986161470413208, |
|
"eval_logits/rejected_0_uf": 3.2023887634277344, |
|
"eval_logits/rejected_1_rlced_conifer": 2.983372449874878, |
|
"eval_logps/chosen": -709.6791381835938, |
|
"eval_logps/chosen_0_uf": -540.6270141601562, |
|
"eval_logps/chosen_1_rlced_conifer": -758.7691650390625, |
|
"eval_logps/rejected": -1280.523193359375, |
|
"eval_logps/rejected_0_uf": -637.3894653320312, |
|
"eval_logps/rejected_1_rlced_conifer": -1461.084716796875, |
|
"eval_loss": 0.23909305036067963, |
|
"eval_rewards/accuracies": 0.8787593841552734, |
|
"eval_rewards/accuracies_0_uf": 0.7750738263130188, |
|
"eval_rewards/accuracies_1_rlced_conifer": 0.9088166356086731, |
|
"eval_rewards/chosen": -3.172097682952881, |
|
"eval_rewards/chosen_0_uf": -2.5136752128601074, |
|
"eval_rewards/chosen_1_rlced_conifer": -3.3518855571746826, |
|
"eval_rewards/margins": 5.5957794189453125, |
|
"eval_rewards/margins_0_uf": 1.444154977798462, |
|
"eval_rewards/margins_1_rlced_conifer": 6.783605575561523, |
|
"eval_rewards/rejected": -8.767877578735352, |
|
"eval_rewards/rejected_0_uf": -3.9578301906585693, |
|
"eval_rewards/rejected_1_rlced_conifer": -10.135491371154785, |
|
"eval_runtime": 389.8118, |
|
"eval_samples": 8491, |
|
"eval_samples_per_second": 21.782, |
|
"eval_steps_per_second": 0.341, |
|
"eval_task_excess_loss_0_uf": 0.06897441555737657, |
|
"eval_task_excess_loss_1_rlced_conifer": 0.03781096797136794, |
|
"eval_task_loss_0_uf": 0.5032874345779419, |
|
"eval_task_loss_1_rlced_conifer": 0.17440924048423767, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1639749237232738, |
|
"train_runtime": 41844.7516, |
|
"train_samples": 184443, |
|
"train_samples_per_second": 8.816, |
|
"train_steps_per_second": 0.034 |
|
} |