{ "epoch": 1.9986120749479528, "eval_alpha_0_uf": 0.5116097927093506, "eval_alpha_1_rlced_conifer": 0.4883902370929718, "eval_excess_loss": 0.03742467654742917, "eval_logits/chosen": 1.9375910758972168, "eval_logits/chosen_0_uf": 3.0999701023101807, "eval_logits/chosen_1_rlced_conifer": 1.572981357574463, "eval_logits/rejected": 3.877798080444336, "eval_logits/rejected_0_uf": 5.175272464752197, "eval_logits/rejected_1_rlced_conifer": 3.567551374435425, "eval_logps/chosen": -677.5836791992188, "eval_logps/chosen_0_uf": -497.45623779296875, "eval_logps/chosen_1_rlced_conifer": -728.9337158203125, "eval_logps/rejected": -1262.6171875, "eval_logps/rejected_0_uf": -584.9677124023438, "eval_logps/rejected_1_rlced_conifer": -1451.01318359375, "eval_loss": 0.23950526118278503, "eval_rewards/accuracies": 0.8778195381164551, "eval_rewards/accuracies_0_uf": 0.7632744312286377, "eval_rewards/accuracies_1_rlced_conifer": 0.9097297191619873, "eval_rewards/chosen": -2.8511428833007812, "eval_rewards/chosen_0_uf": -2.081967353820801, "eval_rewards/chosen_1_rlced_conifer": -3.0535335540771484, "eval_rewards/margins": 5.737672805786133, "eval_rewards/margins_0_uf": 1.3516454696655273, "eval_rewards/margins_1_rlced_conifer": 6.98124361038208, "eval_rewards/rejected": -8.588815689086914, "eval_rewards/rejected_0_uf": -3.433612823486328, "eval_rewards/rejected_1_rlced_conifer": -10.03477668762207, "eval_runtime": 389.2096, "eval_samples": 8491, "eval_samples_per_second": 21.816, "eval_steps_per_second": 0.342, "eval_task_excess_loss_0_uf": 0.0723980620206721, "eval_task_excess_loss_1_rlced_conifer": 0.04272522438225138, "eval_task_loss_0_uf": 0.5184707641601562, "eval_task_loss_1_rlced_conifer": 0.17867246270179749, "total_flos": 0.0, "train_loss": 0.14047848768532276, "train_runtime": 41218.966, "train_samples": 184443, "train_samples_per_second": 8.949, "train_steps_per_second": 0.035 }