{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997382884061764, "eval_steps": 100, "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, "logits/chosen": -2.980285167694092, "logits/rejected": -2.87275767326355, "logps/chosen": -313.4390563964844, "logps/rejected": -236.1754150390625, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1, "use_label": 10.0 }, { "epoch": 0.1, "learning_rate": 4.976717112922002e-07, "logits/chosen": -2.8194870948791504, "logits/rejected": -2.8288567066192627, "logps/chosen": -285.2724304199219, "logps/rejected": -270.956298828125, "loss": 0.6838, "pred_label": 150.7020263671875, "rewards/accuracies": 0.5050504803657532, "rewards/chosen": 0.000817809603177011, "rewards/margins": 0.0014873194741085172, "rewards/rejected": -0.0006695101037621498, "step": 100, "use_label": 659.2979736328125 }, { "epoch": 0.21, "learning_rate": 4.3946449359720607e-07, "logits/chosen": -2.828075647354126, "logits/rejected": -2.816530227661133, "logps/chosen": -278.7549133300781, "logps/rejected": -260.5694274902344, "loss": 0.683, "pred_label": 452.552490234375, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.002707230392843485, "rewards/margins": 0.005774380639195442, "rewards/rejected": -0.003067150479182601, "step": 200, "use_label": 1949.447509765625 }, { "epoch": 0.31, "learning_rate": 3.812572759022118e-07, "logits/chosen": -2.8141846656799316, "logits/rejected": -2.8159701824188232, "logps/chosen": -284.0125732421875, "logps/rejected": -253.9112091064453, "loss": 0.6807, "pred_label": 775.85498046875, "rewards/accuracies": 0.5575000047683716, "rewards/chosen": 0.005504029802978039, "rewards/margins": 0.009370613843202591, "rewards/rejected": -0.0038665838073939085, "step": 300, "use_label": 3226.14501953125 }, { "epoch": 0.42, "learning_rate": 3.230500582072177e-07, "logits/chosen": -2.826817512512207, "logits/rejected": -2.8094358444213867, "logps/chosen": -284.3566589355469, "logps/rejected": -262.80731201171875, "loss": 0.6769, "pred_label": 1149.0574951171875, "rewards/accuracies": 0.5774999856948853, "rewards/chosen": 0.007384983357042074, "rewards/margins": 0.015422500669956207, "rewards/rejected": -0.008037514984607697, "step": 400, "use_label": 4452.9423828125 }, { "epoch": 0.52, "learning_rate": 2.648428405122235e-07, "logits/chosen": -2.807734966278076, "logits/rejected": -2.796409845352173, "logps/chosen": -269.9852600097656, "logps/rejected": -252.07232666015625, "loss": 0.6728, "pred_label": 1592.5675048828125, "rewards/accuracies": 0.5756250023841858, "rewards/chosen": 0.006774631794542074, "rewards/margins": 0.01554279588162899, "rewards/rejected": -0.008768163621425629, "step": 500, "use_label": 5609.4326171875 }, { "epoch": 0.63, "learning_rate": 2.0663562281722933e-07, "logits/chosen": -2.8339650630950928, "logits/rejected": -2.82075572013855, "logps/chosen": -285.0927734375, "logps/rejected": -265.4134826660156, "loss": 0.6681, "pred_label": 2111.6650390625, "rewards/accuracies": 0.6206250190734863, "rewards/chosen": 0.013815036043524742, "rewards/margins": 0.0289370846003294, "rewards/rejected": -0.015122047625482082, "step": 600, "use_label": 6690.3349609375 }, { "epoch": 0.73, "learning_rate": 1.4842840512223514e-07, "logits/chosen": -2.827232599258423, "logits/rejected": -2.811751127243042, "logps/chosen": -281.4178771972656, "logps/rejected": -248.81068420410156, "loss": 0.6659, "pred_label": 2680.2724609375, "rewards/accuracies": 0.6200000047683716, "rewards/chosen": 0.01417633332312107, "rewards/margins": 0.029135096818208694, "rewards/rejected": -0.014958759769797325, "step": 700, "use_label": 7721.7275390625 }, { "epoch": 0.84, "learning_rate": 9.022118742724097e-08, "logits/chosen": -2.8300516605377197, "logits/rejected": -2.835542678833008, "logps/chosen": -288.608642578125, "logps/rejected": -261.5773010253906, "loss": 0.6646, "pred_label": 3286.232421875, "rewards/accuracies": 0.6200000047683716, "rewards/chosen": 0.014839441515505314, "rewards/margins": 0.03136582300066948, "rewards/rejected": -0.01652638241648674, "step": 800, "use_label": 8715.767578125 }, { "epoch": 0.94, "learning_rate": 3.20139697322468e-08, "logits/chosen": -2.8211710453033447, "logits/rejected": -2.8280835151672363, "logps/chosen": -277.363525390625, "logps/rejected": -256.4843444824219, "loss": 0.6641, "pred_label": 3882.75244140625, "rewards/accuracies": 0.6331250071525574, "rewards/chosen": 0.01446867547929287, "rewards/margins": 0.02932187356054783, "rewards/rejected": -0.014853193424642086, "step": 900, "use_label": 9719.2470703125 }, { "epoch": 1.0, "eval_logits/chosen": -2.842418670654297, "eval_logits/rejected": -2.846235752105713, "eval_logps/chosen": -284.122314453125, "eval_logps/rejected": -259.4594421386719, "eval_loss": 0.6635700464248657, "eval_pred_label": 4600.50390625, "eval_rewards/accuracies": 0.628000020980835, "eval_rewards/chosen": 0.013506044633686543, "eval_rewards/margins": 0.029479000717401505, "eval_rewards/rejected": -0.015972958877682686, "eval_runtime": 438.8322, "eval_samples_per_second": 4.558, "eval_steps_per_second": 0.285, "eval_use_label": 10931.49609375, "step": 955 }, { "epoch": 1.0, "step": 955, "total_flos": 0.0, "train_loss": 0.6728555943953429, "train_runtime": 24272.064, "train_samples_per_second": 2.519, "train_steps_per_second": 0.039 } ], "logging_steps": 100, "max_steps": 955, "num_train_epochs": 1, "save_steps": 10, "total_flos": 0.0, "trial_name": null, "trial_params": null }