{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997382884061764, "eval_steps": 100, "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-09, "logits/chosen": -2.899709463119507, "logits/rejected": -2.879509687423706, "logps/chosen": -314.8815612792969, "logps/rejected": -239.785888671875, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1, "use_label": 18.0 }, { "epoch": 0.1, "learning_rate": 4.976717112922002e-07, "logits/chosen": -2.8282251358032227, "logits/rejected": -2.8269264698028564, "logps/chosen": -285.5703430175781, "logps/rejected": -267.9831237792969, "loss": 0.6805, "pred_label": 479.7752380371094, "rewards/accuracies": 0.5012626051902771, "rewards/chosen": 0.0007524320390075445, "rewards/margins": 0.0014539315598085523, "rewards/rejected": -0.0007014995790086687, "step": 100, "use_label": 1138.2247314453125 }, { "epoch": 0.21, "learning_rate": 4.3946449359720607e-07, "logits/chosen": -2.829744338989258, "logits/rejected": -2.8234996795654297, "logps/chosen": -283.5379333496094, "logps/rejected": -264.45965576171875, "loss": 0.6797, "pred_label": 1464.762451171875, "rewards/accuracies": 0.5268750190734863, "rewards/chosen": 0.0027550552040338516, "rewards/margins": 0.004276593215763569, "rewards/rejected": -0.0015215380117297173, "step": 200, "use_label": 3337.237548828125 }, { "epoch": 0.31, "learning_rate": 3.812572759022118e-07, "logits/chosen": -2.822391986846924, "logits/rejected": -2.821011543273926, "logps/chosen": -290.09552001953125, "logps/rejected": -260.20050048828125, "loss": 0.678, "pred_label": 2522.33740234375, "rewards/accuracies": 0.546875, "rewards/chosen": 0.004526687320321798, "rewards/margins": 0.008910334669053555, "rewards/rejected": -0.004383646883070469, "step": 300, "use_label": 5479.66259765625 }, { "epoch": 0.42, "learning_rate": 3.230500582072177e-07, "logits/chosen": -2.8350696563720703, "logits/rejected": -2.8237721920013428, "logps/chosen": -284.8573303222656, "logps/rejected": -260.8428039550781, "loss": 0.6752, "pred_label": 3636.47509765625, "rewards/accuracies": 0.5778124928474426, "rewards/chosen": 0.00710176769644022, "rewards/margins": 0.013848603703081608, "rewards/rejected": -0.006746836472302675, "step": 400, "use_label": 7565.52490234375 }, { "epoch": 0.52, "learning_rate": 2.648428405122235e-07, "logits/chosen": -2.8218374252319336, "logits/rejected": -2.810873508453369, "logps/chosen": -281.2003173828125, "logps/rejected": -257.1551818847656, "loss": 0.6707, "pred_label": 4911.896484375, "rewards/accuracies": 0.5731250047683716, "rewards/chosen": 0.008172390051186085, "rewards/margins": 0.017216255888342857, "rewards/rejected": -0.009043867699801922, "step": 500, "use_label": 9490.103515625 }, { "epoch": 0.63, "learning_rate": 2.0663562281722933e-07, "logits/chosen": -2.8341524600982666, "logits/rejected": -2.8230907917022705, "logps/chosen": -284.7864685058594, "logps/rejected": -262.0230712890625, "loss": 0.6665, "pred_label": 6349.55859375, "rewards/accuracies": 0.6025000214576721, "rewards/chosen": 0.012129506096243858, "rewards/margins": 0.025582188740372658, "rewards/rejected": -0.0134526826441288, "step": 600, "use_label": 11252.44140625 }, { "epoch": 0.73, "learning_rate": 1.4842840512223514e-07, "logits/chosen": -2.8274898529052734, "logits/rejected": -2.811511516571045, "logps/chosen": -282.0050048828125, "logps/rejected": -252.4735565185547, "loss": 0.6639, "pred_label": 7854.15869140625, "rewards/accuracies": 0.6112499833106995, "rewards/chosen": 0.013030249625444412, "rewards/margins": 0.026329634711146355, "rewards/rejected": -0.013299385085701942, "step": 700, "use_label": 12947.8408203125 }, { "epoch": 0.84, "learning_rate": 9.022118742724097e-08, "logits/chosen": -2.832928419113159, "logits/rejected": -2.8318238258361816, "logps/chosen": -285.62213134765625, "logps/rejected": -259.8959045410156, "loss": 0.662, "pred_label": 9441.02734375, "rewards/accuracies": 0.6253125071525574, "rewards/chosen": 0.01489347591996193, "rewards/margins": 0.030955424532294273, "rewards/rejected": -0.016061950474977493, "step": 800, "use_label": 14560.97265625 }, { "epoch": 0.94, "learning_rate": 3.20139697322468e-08, "logits/chosen": -2.824517250061035, "logits/rejected": -2.830662250518799, "logps/chosen": -276.052490234375, "logps/rejected": -262.5692138671875, "loss": 0.6628, "pred_label": 10994.255859375, "rewards/accuracies": 0.6143749952316284, "rewards/chosen": 0.012237527407705784, "rewards/margins": 0.026263901963829994, "rewards/rejected": -0.01402637455612421, "step": 900, "use_label": 16207.744140625 }, { "epoch": 1.0, "eval_logits/chosen": -2.841262102127075, "eval_logits/rejected": -2.8343887329101562, "eval_logps/chosen": -281.85919189453125, "eval_logps/rejected": -262.4202880859375, "eval_loss": 0.6618225574493408, "eval_pred_label": 12855.98046875, "eval_rewards/accuracies": 0.6150000095367432, "eval_rewards/chosen": 0.011613711714744568, "eval_rewards/margins": 0.02489962987601757, "eval_rewards/rejected": -0.013285920023918152, "eval_runtime": 826.7995, "eval_samples_per_second": 2.419, "eval_steps_per_second": 0.302, "eval_use_label": 18206.01953125, "step": 955 }, { "epoch": 1.0, "step": 955, "total_flos": 0.0, "train_loss": 0.6705795382954063, "train_runtime": 45840.6595, "train_samples_per_second": 1.334, "train_steps_per_second": 0.021 } ], "logging_steps": 100, "max_steps": 955, "num_train_epochs": 1, "save_steps": 10, "total_flos": 0.0, "trial_name": null, "trial_params": null }