{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991836734693877, "eval_steps": 500, "global_step": 153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-08, "logits/chosen": -2.8377254009246826, "logits/rejected": -2.8079206943511963, "logps/chosen": -250.72598266601562, "logps/rejected": -220.86981201171875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.818211555480957, "logits/rejected": -2.7407193183898926, "logps/chosen": -223.67410278320312, "logps/rejected": -244.5283966064453, "loss": 0.6931, "rewards/accuracies": 0.4305555522441864, "rewards/chosen": 0.00010981507512042299, "rewards/margins": -0.0004382279294077307, "rewards/rejected": 0.000548043055459857, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989490450759331e-07, "logits/chosen": -2.7361624240875244, "logits/rejected": -2.7406516075134277, "logps/chosen": -244.0074462890625, "logps/rejected": -265.5520935058594, "loss": 0.6925, "rewards/accuracies": 0.59375, "rewards/chosen": 0.017707763239741325, "rewards/margins": 0.0019056980963796377, "rewards/rejected": 0.015802066773176193, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.872270441827174e-07, "logits/chosen": -2.6628098487854004, "logits/rejected": -2.6667449474334717, "logps/chosen": -242.32473754882812, "logps/rejected": -298.2237243652344, "loss": 0.6901, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06090053170919418, "rewards/margins": 0.005070054437965155, "rewards/rejected": 0.055830471217632294, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.6308512113530063e-07, "logits/chosen": -2.654808521270752, "logits/rejected": -2.6328892707824707, "logps/chosen": -261.2207336425781, "logps/rejected": -291.29718017578125, "loss": 0.6849, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.03462223336100578, "rewards/margins": 0.017941609025001526, "rewards/rejected": 0.016680624336004257, "step": 40 }, { "epoch": 0.33, "learning_rate": 4.277872161641681e-07, "logits/chosen": -2.644195079803467, "logits/rejected": -2.6148362159729004, "logps/chosen": -236.685546875, "logps/rejected": -253.20339965820312, "loss": 0.676, "rewards/accuracies": 0.5625, "rewards/chosen": -0.007446089293807745, "rewards/margins": 0.022974317893385887, "rewards/rejected": -0.03042040765285492, "step": 50 }, { "epoch": 0.39, "learning_rate": 3.8318133624280046e-07, "logits/chosen": -2.6215896606445312, "logits/rejected": -2.611788749694824, "logps/chosen": -217.6464385986328, "logps/rejected": -262.92156982421875, "loss": 0.6697, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.04969613626599312, "rewards/margins": 0.06476574391126633, "rewards/rejected": -0.11446187645196915, "step": 60 }, { "epoch": 0.46, "learning_rate": 3.316028034595861e-07, "logits/chosen": -2.5813162326812744, "logits/rejected": -2.550109386444092, "logps/chosen": -243.3549041748047, "logps/rejected": -266.4927978515625, "loss": 0.6645, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09574780613183975, "rewards/margins": 0.07986501604318619, "rewards/rejected": -0.17561282217502594, "step": 70 }, { "epoch": 0.52, "learning_rate": 2.7575199021178855e-07, "logits/chosen": -2.462153911590576, "logits/rejected": -2.461726665496826, "logps/chosen": -256.2433166503906, "logps/rejected": -277.47711181640625, "loss": 0.6609, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.27105945348739624, "rewards/margins": 0.08728176355361938, "rewards/rejected": -0.35834115743637085, "step": 80 }, { "epoch": 0.59, "learning_rate": 2.1855294234408068e-07, "logits/chosen": -2.4760749340057373, "logits/rejected": -2.440697431564331, "logps/chosen": -245.5635986328125, "logps/rejected": -265.27850341796875, "loss": 0.6553, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1536487340927124, "rewards/margins": 0.0763266533613205, "rewards/rejected": -0.2299753725528717, "step": 90 }, { "epoch": 0.65, "learning_rate": 1.6300029195778453e-07, "logits/chosen": -2.3994648456573486, "logits/rejected": -2.3992934226989746, "logps/chosen": -253.02627563476562, "logps/rejected": -288.02374267578125, "loss": 0.6533, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21652010083198547, "rewards/margins": 0.1159961000084877, "rewards/rejected": -0.3325161933898926, "step": 100 }, { "epoch": 0.72, "learning_rate": 1.1200247470632392e-07, "logits/chosen": -2.3692357540130615, "logits/rejected": -2.3380370140075684, "logps/chosen": -245.78970336914062, "logps/rejected": -253.38925170898438, "loss": 0.6503, "rewards/accuracies": 0.59375, "rewards/chosen": -0.18969933688640594, "rewards/margins": 0.09776748716831207, "rewards/rejected": -0.287466824054718, "step": 110 }, { "epoch": 0.78, "learning_rate": 6.822945986946385e-08, "logits/chosen": -2.4934608936309814, "logits/rejected": -2.4202685356140137, "logps/chosen": -278.57794189453125, "logps/rejected": -307.510986328125, "loss": 0.6486, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16311107575893402, "rewards/margins": 0.1353956162929535, "rewards/rejected": -0.2985066771507263, "step": 120 }, { "epoch": 0.85, "learning_rate": 3.397296523427806e-08, "logits/chosen": -2.335261821746826, "logits/rejected": -2.3717098236083984, "logps/chosen": -279.692626953125, "logps/rejected": -303.536865234375, "loss": 0.6376, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.18839788436889648, "rewards/margins": 0.11289455741643906, "rewards/rejected": -0.30129244923591614, "step": 130 }, { "epoch": 0.91, "learning_rate": 1.1026475173977978e-08, "logits/chosen": -2.3872182369232178, "logits/rejected": -2.3598804473876953, "logps/chosen": -270.6761169433594, "logps/rejected": -299.95648193359375, "loss": 0.6474, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2340303361415863, "rewards/margins": 0.1268264353275299, "rewards/rejected": -0.3608567714691162, "step": 140 }, { "epoch": 0.98, "learning_rate": 5.913435276374834e-10, "logits/chosen": -2.29586124420166, "logits/rejected": -2.324831485748291, "logps/chosen": -239.48080444335938, "logps/rejected": -296.5132751464844, "loss": 0.6436, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.19154903292655945, "rewards/margins": 0.1397678405046463, "rewards/rejected": -0.33131688833236694, "step": 150 }, { "epoch": 1.0, "step": 153, "total_flos": 0.0, "train_loss": 0.6648475749819887, "train_runtime": 9061.6732, "train_samples_per_second": 2.163, "train_steps_per_second": 0.017 } ], "logging_steps": 10, "max_steps": 153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }