{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997382884061764, "eval_steps": 100, "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, "logits/chosen": -2.7525930404663086, "logits/rejected": -2.6732418537139893, "logps/chosen": -297.177001953125, "logps/rejected": -236.72621154785156, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1, "use_label": 17.0 }, { "epoch": 0.1, "learning_rate": 4.976717112922002e-07, "logits/chosen": -2.6616106033325195, "logits/rejected": -2.6597719192504883, "logps/chosen": -270.4000244140625, "logps/rejected": -249.33827209472656, "loss": 0.6829, "pred_label": 333.43182373046875, "rewards/accuracies": 0.4965277910232544, "rewards/chosen": 0.0011782451765611768, "rewards/margins": 0.001073930412530899, "rewards/rejected": 0.00010431456030346453, "step": 100, "use_label": 1283.5682373046875 }, { "epoch": 0.21, "learning_rate": 4.3946449359720607e-07, "logits/chosen": -2.6945221424102783, "logits/rejected": -2.678621530532837, "logps/chosen": -271.6979064941406, "logps/rejected": -254.37026977539062, "loss": 0.6799, "pred_label": 1038.7462158203125, "rewards/accuracies": 0.5350000262260437, "rewards/chosen": 0.004888341296464205, "rewards/margins": 0.007898561656475067, "rewards/rejected": -0.0030102210585027933, "step": 200, "use_label": 3762.253662109375 }, { "epoch": 0.31, "learning_rate": 3.812572759022118e-07, "logits/chosen": -2.6708526611328125, "logits/rejected": -2.6628105640411377, "logps/chosen": -272.3077392578125, "logps/rejected": -253.75027465820312, "loss": 0.6728, "pred_label": 1884.596923828125, "rewards/accuracies": 0.5653125047683716, "rewards/chosen": 0.010109632275998592, "rewards/margins": 0.016557401046156883, "rewards/rejected": -0.006447767838835716, "step": 300, "use_label": 6116.4033203125 }, { "epoch": 0.42, "learning_rate": 3.230500582072177e-07, "logits/chosen": -2.668009042739868, "logits/rejected": -2.650494337081909, "logps/chosen": -267.6447448730469, "logps/rejected": -253.59107971191406, "loss": 0.6616, "pred_label": 3012.675537109375, "rewards/accuracies": 0.6193749904632568, "rewards/chosen": 0.017754318192601204, "rewards/margins": 0.030351871624588966, "rewards/rejected": -0.012597555294632912, "step": 400, "use_label": 8188.32421875 }, { "epoch": 0.52, "learning_rate": 2.648428405122235e-07, "logits/chosen": -2.6697680950164795, "logits/rejected": -2.6707708835601807, "logps/chosen": -271.2095642089844, "logps/rejected": -247.21224975585938, "loss": 0.6528, "pred_label": 4377.916015625, "rewards/accuracies": 0.625, "rewards/chosen": 0.024391591548919678, "rewards/margins": 0.04303843528032303, "rewards/rejected": -0.01864684373140335, "step": 500, "use_label": 10023.083984375 }, { "epoch": 0.63, "learning_rate": 2.0663562281722933e-07, "logits/chosen": -2.659043073654175, "logits/rejected": -2.6555004119873047, "logps/chosen": -272.95050048828125, "logps/rejected": -251.1392364501953, "loss": 0.6442, "pred_label": 5962.0673828125, "rewards/accuracies": 0.6553124785423279, "rewards/chosen": 0.030743848532438278, "rewards/margins": 0.0554736964404583, "rewards/rejected": -0.024729840457439423, "step": 600, "use_label": 11638.9326171875 }, { "epoch": 0.73, "learning_rate": 1.4842840512223514e-07, "logits/chosen": -2.6591668128967285, "logits/rejected": -2.6622869968414307, "logps/chosen": -269.9889221191406, "logps/rejected": -245.4040985107422, "loss": 0.64, "pred_label": 7640.8505859375, "rewards/accuracies": 0.6478124856948853, "rewards/chosen": 0.03263993561267853, "rewards/margins": 0.061180587857961655, "rewards/rejected": -0.02854064851999283, "step": 700, "use_label": 13160.150390625 }, { "epoch": 0.84, "learning_rate": 9.022118742724097e-08, "logits/chosen": -2.650268793106079, "logits/rejected": -2.6555473804473877, "logps/chosen": -272.705322265625, "logps/rejected": -252.30169677734375, "loss": 0.6368, "pred_label": 9366.9609375, "rewards/accuracies": 0.6415625214576721, "rewards/chosen": 0.031398553401231766, "rewards/margins": 0.06083739921450615, "rewards/rejected": -0.029438842087984085, "step": 800, "use_label": 14634.0390625 }, { "epoch": 0.94, "learning_rate": 3.20139697322468e-08, "logits/chosen": -2.6563680171966553, "logits/rejected": -2.6590001583099365, "logps/chosen": -269.04559326171875, "logps/rejected": -253.2301025390625, "loss": 0.6377, "pred_label": 11126.677734375, "rewards/accuracies": 0.6418750286102295, "rewards/chosen": 0.02964354306459427, "rewards/margins": 0.05687180534005165, "rewards/rejected": -0.027228260412812233, "step": 900, "use_label": 16074.322265625 }, { "epoch": 1.0, "eval_logits/chosen": -2.4939169883728027, "eval_logits/rejected": -2.495774507522583, "eval_logps/chosen": -269.28546142578125, "eval_logps/rejected": -253.23594665527344, "eval_loss": 0.6354129910469055, "eval_pred_label": 13234.32421875, "eval_rewards/accuracies": 0.6259999871253967, "eval_rewards/chosen": 0.027118388563394547, "eval_rewards/margins": 0.056793875992298126, "eval_rewards/rejected": -0.029675481840968132, "eval_runtime": 1016.337, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.246, "eval_use_label": 17827.67578125, "step": 955 }, { "epoch": 1.0, "step": 955, "total_flos": 0.0, "train_loss": 0.6554346030919339, "train_runtime": 50166.5495, "train_samples_per_second": 1.219, "train_steps_per_second": 0.019 } ], "logging_steps": 100, "max_steps": 955, "num_train_epochs": 1, "save_steps": 10, "total_flos": 0.0, "trial_name": null, "trial_params": null }