|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998451213216314, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": 22.749126434326172, |
|
"logits/rejected": 22.455398559570312, |
|
"logps/chosen": -415.7331848144531, |
|
"logps/rejected": -294.51483154296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_logits/chosen": 23.82334327697754, |
|
"eval_logits/rejected": 23.573287963867188, |
|
"eval_logps/chosen": -354.5701599121094, |
|
"eval_logps/rejected": -274.08343505859375, |
|
"eval_loss": 0.6931473612785339, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 208.2485, |
|
"eval_samples_per_second": 9.604, |
|
"eval_steps_per_second": 0.303, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": 23.493385314941406, |
|
"logits/rejected": 23.479415893554688, |
|
"logps/chosen": -359.0509948730469, |
|
"logps/rejected": -263.7375793457031, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 0.016306404024362564, |
|
"rewards/margins": 0.025918345898389816, |
|
"rewards/rejected": -0.009611942805349827, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": 23.505186080932617, |
|
"logits/rejected": 23.52346420288086, |
|
"logps/chosen": -327.48468017578125, |
|
"logps/rejected": -279.432861328125, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.013154825195670128, |
|
"rewards/margins": -0.014362807385623455, |
|
"rewards/rejected": 0.0012079827720299363, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": 23.50873374938965, |
|
"logits/rejected": 23.2880859375, |
|
"logps/chosen": -340.9912109375, |
|
"logps/rejected": -269.15045166015625, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0022484897635877132, |
|
"rewards/margins": -0.017411604523658752, |
|
"rewards/rejected": 0.0196601003408432, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": 23.961822509765625, |
|
"logits/rejected": 23.730144500732422, |
|
"logps/chosen": -414.52447509765625, |
|
"logps/rejected": -300.4974670410156, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.003453383222222328, |
|
"rewards/margins": 0.017737122252583504, |
|
"rewards/rejected": -0.014283737167716026, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": 23.999908447265625, |
|
"logits/rejected": 23.47333335876465, |
|
"logps/chosen": -313.49395751953125, |
|
"logps/rejected": -216.2849578857422, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.021781612187623978, |
|
"rewards/margins": 0.03288044035434723, |
|
"rewards/rejected": -0.011098823510110378, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": 23.825542449951172, |
|
"logits/rejected": 23.716323852539062, |
|
"logps/chosen": -306.31744384765625, |
|
"logps/rejected": -260.7249755859375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.009675316512584686, |
|
"rewards/margins": -0.021775808185338974, |
|
"rewards/rejected": 0.03145112842321396, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": 23.89028549194336, |
|
"logits/rejected": 23.66950798034668, |
|
"logps/chosen": -364.57757568359375, |
|
"logps/rejected": -250.9732208251953, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0638527050614357, |
|
"rewards/margins": 0.016006827354431152, |
|
"rewards/rejected": 0.047845881432294846, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": 23.972980499267578, |
|
"logits/rejected": 23.702159881591797, |
|
"logps/chosen": -360.4600524902344, |
|
"logps/rejected": -277.17767333984375, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.10171504318714142, |
|
"rewards/margins": 0.051059722900390625, |
|
"rewards/rejected": 0.05065532401204109, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": 23.601802825927734, |
|
"logits/rejected": 23.44902229309082, |
|
"logps/chosen": -256.45306396484375, |
|
"logps/rejected": -228.2622528076172, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.13566820323467255, |
|
"rewards/margins": 0.05287040024995804, |
|
"rewards/rejected": 0.0827978178858757, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": 23.945114135742188, |
|
"logits/rejected": 23.670852661132812, |
|
"logps/chosen": -317.6385192871094, |
|
"logps/rejected": -238.4324188232422, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.14283855259418488, |
|
"rewards/margins": 0.07535254955291748, |
|
"rewards/rejected": 0.0674859955906868, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": 23.83555793762207, |
|
"eval_logits/rejected": 23.585235595703125, |
|
"eval_logps/chosen": -352.80859375, |
|
"eval_logps/rejected": -273.12677001953125, |
|
"eval_loss": 0.6592543125152588, |
|
"eval_rewards/accuracies": 0.6150793433189392, |
|
"eval_rewards/chosen": 0.17615097761154175, |
|
"eval_rewards/margins": 0.0804828330874443, |
|
"eval_rewards/rejected": 0.09566814452409744, |
|
"eval_runtime": 210.7096, |
|
"eval_samples_per_second": 9.492, |
|
"eval_steps_per_second": 0.299, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": 23.709579467773438, |
|
"logits/rejected": 23.512853622436523, |
|
"logps/chosen": -349.40234375, |
|
"logps/rejected": -243.11532592773438, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.16010603308677673, |
|
"rewards/margins": 0.09831614792346954, |
|
"rewards/rejected": 0.06178988143801689, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": 23.544376373291016, |
|
"logits/rejected": 23.377239227294922, |
|
"logps/chosen": -341.64080810546875, |
|
"logps/rejected": -247.55844116210938, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.1492854803800583, |
|
"rewards/margins": 0.0526873879134655, |
|
"rewards/rejected": 0.09659810364246368, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": 24.006563186645508, |
|
"logits/rejected": 23.8785457611084, |
|
"logps/chosen": -321.85467529296875, |
|
"logps/rejected": -281.0990905761719, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.19155286252498627, |
|
"rewards/margins": 0.1119670420885086, |
|
"rewards/rejected": 0.07958582043647766, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": 23.71746826171875, |
|
"logits/rejected": 23.616607666015625, |
|
"logps/chosen": -346.86761474609375, |
|
"logps/rejected": -257.8626708984375, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.179647758603096, |
|
"rewards/margins": 0.20804457366466522, |
|
"rewards/rejected": -0.02839680388569832, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": 23.601333618164062, |
|
"logits/rejected": 23.368152618408203, |
|
"logps/chosen": -342.10003662109375, |
|
"logps/rejected": -261.25201416015625, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.13529065251350403, |
|
"rewards/margins": 0.20980004966259003, |
|
"rewards/rejected": -0.0745093896985054, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": 24.020530700683594, |
|
"logits/rejected": 23.818883895874023, |
|
"logps/chosen": -362.73968505859375, |
|
"logps/rejected": -253.7847137451172, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.13591055572032928, |
|
"rewards/margins": 0.31857621669769287, |
|
"rewards/rejected": -0.1826656460762024, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": 23.72347640991211, |
|
"logits/rejected": 23.625173568725586, |
|
"logps/chosen": -337.2410583496094, |
|
"logps/rejected": -265.833740234375, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.06029454618692398, |
|
"rewards/margins": 0.21368882060050964, |
|
"rewards/rejected": -0.15339429676532745, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": 24.024005889892578, |
|
"logits/rejected": 23.694889068603516, |
|
"logps/chosen": -303.23358154296875, |
|
"logps/rejected": -259.80047607421875, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.10565178096294403, |
|
"rewards/margins": 0.33361369371414185, |
|
"rewards/rejected": -0.2279619425535202, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": 23.458202362060547, |
|
"logits/rejected": 23.41326904296875, |
|
"logps/chosen": -278.2962341308594, |
|
"logps/rejected": -242.08627319335938, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.05947988107800484, |
|
"rewards/margins": 0.3678347170352936, |
|
"rewards/rejected": -0.30835479497909546, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": 23.741714477539062, |
|
"logits/rejected": 23.483057022094727, |
|
"logps/chosen": -314.7781066894531, |
|
"logps/rejected": -248.27880859375, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.1307556927204132, |
|
"rewards/margins": 0.3709767758846283, |
|
"rewards/rejected": -0.2402210682630539, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 23.830230712890625, |
|
"eval_logits/rejected": 23.587175369262695, |
|
"eval_logps/chosen": -353.7904052734375, |
|
"eval_logps/rejected": -277.4797668457031, |
|
"eval_loss": 0.5836150646209717, |
|
"eval_rewards/accuracies": 0.6507936716079712, |
|
"eval_rewards/chosen": 0.07797454297542572, |
|
"eval_rewards/margins": 0.41760751605033875, |
|
"eval_rewards/rejected": -0.33963292837142944, |
|
"eval_runtime": 208.5861, |
|
"eval_samples_per_second": 9.588, |
|
"eval_steps_per_second": 0.302, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": 23.76480484008789, |
|
"logits/rejected": 23.56380271911621, |
|
"logps/chosen": -377.50799560546875, |
|
"logps/rejected": -279.08978271484375, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0905425176024437, |
|
"rewards/margins": 0.527503252029419, |
|
"rewards/rejected": -0.43696069717407227, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": 23.482959747314453, |
|
"logits/rejected": 23.370895385742188, |
|
"logps/chosen": -316.96038818359375, |
|
"logps/rejected": -253.94686889648438, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16429784893989563, |
|
"rewards/margins": 0.4349435865879059, |
|
"rewards/rejected": -0.5992413759231567, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": 23.473817825317383, |
|
"logits/rejected": 23.369760513305664, |
|
"logps/chosen": -334.98663330078125, |
|
"logps/rejected": -293.44854736328125, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1187012642621994, |
|
"rewards/margins": 0.38999611139297485, |
|
"rewards/rejected": -0.5086973905563354, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": 23.480493545532227, |
|
"logits/rejected": 23.42662239074707, |
|
"logps/chosen": -329.04595947265625, |
|
"logps/rejected": -243.5697784423828, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.16112008690834045, |
|
"rewards/margins": 0.41859644651412964, |
|
"rewards/rejected": -0.5797165036201477, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": 23.753459930419922, |
|
"logits/rejected": 23.624629974365234, |
|
"logps/chosen": -347.7720642089844, |
|
"logps/rejected": -273.23162841796875, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06806284189224243, |
|
"rewards/margins": 0.4797073304653168, |
|
"rewards/rejected": -0.5477702021598816, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": 23.70407485961914, |
|
"logits/rejected": 23.5228328704834, |
|
"logps/chosen": -310.2815856933594, |
|
"logps/rejected": -250.3536376953125, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07945041358470917, |
|
"rewards/margins": 0.49080556631088257, |
|
"rewards/rejected": -0.5702559351921082, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": 23.76226234436035, |
|
"logits/rejected": 23.472620010375977, |
|
"logps/chosen": -301.5387268066406, |
|
"logps/rejected": -240.7628631591797, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07512088119983673, |
|
"rewards/margins": 0.4631730914115906, |
|
"rewards/rejected": -0.5382939577102661, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": 23.984760284423828, |
|
"logits/rejected": 23.863937377929688, |
|
"logps/chosen": -373.2278137207031, |
|
"logps/rejected": -285.9132995605469, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.10135757923126221, |
|
"rewards/margins": 0.6022639274597168, |
|
"rewards/rejected": -0.5009063482284546, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": 23.733274459838867, |
|
"logits/rejected": 23.508481979370117, |
|
"logps/chosen": -356.46099853515625, |
|
"logps/rejected": -259.97003173828125, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.18595895171165466, |
|
"rewards/margins": 0.5399104952812195, |
|
"rewards/rejected": -0.7258695363998413, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": 23.513275146484375, |
|
"logits/rejected": 23.471511840820312, |
|
"logps/chosen": -293.0979919433594, |
|
"logps/rejected": -249.67446899414062, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22943711280822754, |
|
"rewards/margins": 0.6720036268234253, |
|
"rewards/rejected": -0.9014407396316528, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": 23.749773025512695, |
|
"eval_logits/rejected": 23.52240753173828, |
|
"eval_logps/chosen": -356.49285888671875, |
|
"eval_logps/rejected": -281.9402770996094, |
|
"eval_loss": 0.5510157942771912, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": -0.19227494299411774, |
|
"eval_rewards/margins": 0.5934095978736877, |
|
"eval_rewards/rejected": -0.7856844663619995, |
|
"eval_runtime": 210.4467, |
|
"eval_samples_per_second": 9.504, |
|
"eval_steps_per_second": 0.299, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": 23.758647918701172, |
|
"logits/rejected": 23.599285125732422, |
|
"logps/chosen": -387.0029296875, |
|
"logps/rejected": -297.8297119140625, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.22534582018852234, |
|
"rewards/margins": 0.4947783946990967, |
|
"rewards/rejected": -0.7201241254806519, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": 23.673627853393555, |
|
"logits/rejected": 23.470468521118164, |
|
"logps/chosen": -269.2679748535156, |
|
"logps/rejected": -209.1413116455078, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2326889932155609, |
|
"rewards/margins": 0.49293145537376404, |
|
"rewards/rejected": -0.7256205677986145, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": 23.728256225585938, |
|
"logits/rejected": 23.57656478881836, |
|
"logps/chosen": -341.84552001953125, |
|
"logps/rejected": -279.85650634765625, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2053179293870926, |
|
"rewards/margins": 0.5053264498710632, |
|
"rewards/rejected": -0.7106443643569946, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": 23.395517349243164, |
|
"logits/rejected": 23.30283546447754, |
|
"logps/chosen": -276.00958251953125, |
|
"logps/rejected": -245.6515655517578, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4472281038761139, |
|
"rewards/margins": 0.5112749338150024, |
|
"rewards/rejected": -0.9585030674934387, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": 23.459369659423828, |
|
"logits/rejected": 23.258296966552734, |
|
"logps/chosen": -351.51153564453125, |
|
"logps/rejected": -265.9107666015625, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.248783141374588, |
|
"rewards/margins": 0.7760157585144043, |
|
"rewards/rejected": -1.02479887008667, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": 23.64513397216797, |
|
"logits/rejected": 23.49908447265625, |
|
"logps/chosen": -327.1449890136719, |
|
"logps/rejected": -301.5306396484375, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.29137879610061646, |
|
"rewards/margins": 0.8306191563606262, |
|
"rewards/rejected": -1.1219979524612427, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": 23.72499656677246, |
|
"logits/rejected": 23.477428436279297, |
|
"logps/chosen": -337.2041320800781, |
|
"logps/rejected": -292.93463134765625, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20421965420246124, |
|
"rewards/margins": 0.776501476764679, |
|
"rewards/rejected": -0.980721116065979, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": 23.811683654785156, |
|
"logits/rejected": 23.42571258544922, |
|
"logps/chosen": -364.2945251464844, |
|
"logps/rejected": -283.0462646484375, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.06022878363728523, |
|
"rewards/margins": 0.7983857989311218, |
|
"rewards/rejected": -0.8586145639419556, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": 23.39688491821289, |
|
"logits/rejected": 23.162023544311523, |
|
"logps/chosen": -323.4096984863281, |
|
"logps/rejected": -250.5354461669922, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1642606556415558, |
|
"rewards/margins": 0.8033970594406128, |
|
"rewards/rejected": -0.967657744884491, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": 23.463436126708984, |
|
"logits/rejected": 23.304988861083984, |
|
"logps/chosen": -290.4604797363281, |
|
"logps/rejected": -257.20977783203125, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2738291919231415, |
|
"rewards/margins": 0.6287984848022461, |
|
"rewards/rejected": -0.9026277661323547, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": 23.72638702392578, |
|
"eval_logits/rejected": 23.50330352783203, |
|
"eval_logps/chosen": -356.5235290527344, |
|
"eval_logps/rejected": -283.01190185546875, |
|
"eval_loss": 0.5360822081565857, |
|
"eval_rewards/accuracies": 0.7341269850730896, |
|
"eval_rewards/chosen": -0.19533830881118774, |
|
"eval_rewards/margins": 0.6975098848342896, |
|
"eval_rewards/rejected": -0.8928481936454773, |
|
"eval_runtime": 211.6561, |
|
"eval_samples_per_second": 9.449, |
|
"eval_steps_per_second": 0.298, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": 23.651836395263672, |
|
"logits/rejected": 23.562541961669922, |
|
"logps/chosen": -295.5309143066406, |
|
"logps/rejected": -256.42864990234375, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3951832056045532, |
|
"rewards/margins": 0.48603707551956177, |
|
"rewards/rejected": -0.8812202215194702, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": 23.555591583251953, |
|
"logits/rejected": 23.477405548095703, |
|
"logps/chosen": -291.4106140136719, |
|
"logps/rejected": -254.1300048828125, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.44952210783958435, |
|
"rewards/margins": 0.4881154000759125, |
|
"rewards/rejected": -0.937637448310852, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": 23.740278244018555, |
|
"logits/rejected": 23.43073844909668, |
|
"logps/chosen": -283.1187438964844, |
|
"logps/rejected": -268.111083984375, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4848947525024414, |
|
"rewards/margins": 0.5371454954147339, |
|
"rewards/rejected": -1.0220401287078857, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": 23.772052764892578, |
|
"logits/rejected": 23.574148178100586, |
|
"logps/chosen": -316.62310791015625, |
|
"logps/rejected": -249.21389770507812, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24736304581165314, |
|
"rewards/margins": 0.6857331395149231, |
|
"rewards/rejected": -0.9330962300300598, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": 23.818843841552734, |
|
"logits/rejected": 23.663349151611328, |
|
"logps/chosen": -301.2689208984375, |
|
"logps/rejected": -274.0567932128906, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.04559071734547615, |
|
"rewards/margins": 0.5639020800590515, |
|
"rewards/rejected": -0.6094927191734314, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": 23.291194915771484, |
|
"logits/rejected": 23.422870635986328, |
|
"logps/chosen": -323.94488525390625, |
|
"logps/rejected": -233.00833129882812, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03107648529112339, |
|
"rewards/margins": 0.7044192552566528, |
|
"rewards/rejected": -0.7354957461357117, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": 23.393449783325195, |
|
"logits/rejected": 23.201961517333984, |
|
"logps/chosen": -318.3015441894531, |
|
"logps/rejected": -219.90170288085938, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05152938514947891, |
|
"rewards/margins": 0.8564150929450989, |
|
"rewards/rejected": -0.90794438123703, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": 23.315677642822266, |
|
"logits/rejected": 23.30160903930664, |
|
"logps/chosen": -354.28302001953125, |
|
"logps/rejected": -268.2124938964844, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.004499013535678387, |
|
"rewards/margins": 0.9241636395454407, |
|
"rewards/rejected": -0.9196645617485046, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": 23.65988540649414, |
|
"logits/rejected": 23.262027740478516, |
|
"logps/chosen": -363.83416748046875, |
|
"logps/rejected": -269.9544677734375, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03792769834399223, |
|
"rewards/margins": 0.8194522857666016, |
|
"rewards/rejected": -0.8573800325393677, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": 23.21782684326172, |
|
"logits/rejected": 22.95124053955078, |
|
"logps/chosen": -272.1101379394531, |
|
"logps/rejected": -233.3650665283203, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.18960613012313843, |
|
"rewards/margins": 0.5189381837844849, |
|
"rewards/rejected": -0.7085443139076233, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 23.67182731628418, |
|
"eval_logits/rejected": 23.457815170288086, |
|
"eval_logps/chosen": -355.6113586425781, |
|
"eval_logps/rejected": -282.89288330078125, |
|
"eval_loss": 0.5261635184288025, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": -0.10412228107452393, |
|
"eval_rewards/margins": 0.7768236994743347, |
|
"eval_rewards/rejected": -0.8809459805488586, |
|
"eval_runtime": 208.2947, |
|
"eval_samples_per_second": 9.602, |
|
"eval_steps_per_second": 0.302, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": 23.604022979736328, |
|
"logits/rejected": 23.44409942626953, |
|
"logps/chosen": -326.31378173828125, |
|
"logps/rejected": -279.2933349609375, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2937595248222351, |
|
"rewards/margins": 0.6278744339942932, |
|
"rewards/rejected": -0.9216337203979492, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": 23.713848114013672, |
|
"logits/rejected": 23.53582000732422, |
|
"logps/chosen": -304.9338684082031, |
|
"logps/rejected": -268.5104064941406, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10584266483783722, |
|
"rewards/margins": 0.8640462160110474, |
|
"rewards/rejected": -0.9698888063430786, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": 23.50804328918457, |
|
"logits/rejected": 23.286922454833984, |
|
"logps/chosen": -291.4549560546875, |
|
"logps/rejected": -222.6033935546875, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3200764060020447, |
|
"rewards/margins": 0.6575796008110046, |
|
"rewards/rejected": -0.9776560068130493, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": 23.716320037841797, |
|
"logits/rejected": 23.562469482421875, |
|
"logps/chosen": -322.17047119140625, |
|
"logps/rejected": -258.58917236328125, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1657361090183258, |
|
"rewards/margins": 0.7579048871994019, |
|
"rewards/rejected": -0.9236409068107605, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": 23.58610725402832, |
|
"logits/rejected": 23.374378204345703, |
|
"logps/chosen": -303.7857971191406, |
|
"logps/rejected": -266.2262878417969, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1886710226535797, |
|
"rewards/margins": 0.5424461364746094, |
|
"rewards/rejected": -0.7311171293258667, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": 23.34024429321289, |
|
"logits/rejected": 23.08355140686035, |
|
"logps/chosen": -346.318603515625, |
|
"logps/rejected": -288.74432373046875, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1655091792345047, |
|
"rewards/margins": 0.8151466250419617, |
|
"rewards/rejected": -0.98065584897995, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": 23.876493453979492, |
|
"logits/rejected": 23.629627227783203, |
|
"logps/chosen": -300.15509033203125, |
|
"logps/rejected": -273.7672424316406, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30857834219932556, |
|
"rewards/margins": 0.6672872304916382, |
|
"rewards/rejected": -0.9758656620979309, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": 23.596471786499023, |
|
"logits/rejected": 23.427684783935547, |
|
"logps/chosen": -334.6555480957031, |
|
"logps/rejected": -264.3316345214844, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2455914467573166, |
|
"rewards/margins": 0.883080005645752, |
|
"rewards/rejected": -1.128671407699585, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": 23.314987182617188, |
|
"logits/rejected": 23.11943817138672, |
|
"logps/chosen": -298.7581787109375, |
|
"logps/rejected": -246.3135223388672, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3651345372200012, |
|
"rewards/margins": 0.677712619304657, |
|
"rewards/rejected": -1.0428470373153687, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": 23.37234115600586, |
|
"logits/rejected": 23.340373992919922, |
|
"logps/chosen": -286.86737060546875, |
|
"logps/rejected": -220.9873504638672, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4736716151237488, |
|
"rewards/margins": 0.5933648347854614, |
|
"rewards/rejected": -1.0670363903045654, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": 23.661834716796875, |
|
"eval_logits/rejected": 23.4466495513916, |
|
"eval_logps/chosen": -356.5157775878906, |
|
"eval_logps/rejected": -284.3682861328125, |
|
"eval_loss": 0.5155950784683228, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -0.19456443190574646, |
|
"eval_rewards/margins": 0.8339203000068665, |
|
"eval_rewards/rejected": -1.02848482131958, |
|
"eval_runtime": 211.7272, |
|
"eval_samples_per_second": 9.446, |
|
"eval_steps_per_second": 0.298, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": 23.545116424560547, |
|
"logits/rejected": 23.458499908447266, |
|
"logps/chosen": -338.8705139160156, |
|
"logps/rejected": -272.00714111328125, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2067803144454956, |
|
"rewards/margins": 0.8310182690620422, |
|
"rewards/rejected": -1.037798523902893, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": 23.625316619873047, |
|
"logits/rejected": 23.448591232299805, |
|
"logps/chosen": -345.89208984375, |
|
"logps/rejected": -306.5247497558594, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30394795536994934, |
|
"rewards/margins": 0.5751500725746155, |
|
"rewards/rejected": -0.8790979385375977, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": 22.87206268310547, |
|
"logits/rejected": 22.757465362548828, |
|
"logps/chosen": -309.4687805175781, |
|
"logps/rejected": -291.12847900390625, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20515112578868866, |
|
"rewards/margins": 0.6493626236915588, |
|
"rewards/rejected": -0.8545138239860535, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": 23.304141998291016, |
|
"logits/rejected": 23.251794815063477, |
|
"logps/chosen": -333.1667785644531, |
|
"logps/rejected": -272.1311950683594, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.41801586747169495, |
|
"rewards/margins": 0.5841894149780273, |
|
"rewards/rejected": -1.0022052526474, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": 23.594745635986328, |
|
"logits/rejected": 23.500207901000977, |
|
"logps/chosen": -357.5419616699219, |
|
"logps/rejected": -274.1604309082031, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.20376773178577423, |
|
"rewards/margins": 0.8759373426437378, |
|
"rewards/rejected": -1.0797051191329956, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": 23.767133712768555, |
|
"logits/rejected": 23.464405059814453, |
|
"logps/chosen": -308.720458984375, |
|
"logps/rejected": -290.14306640625, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.28276339173316956, |
|
"rewards/margins": 0.6749929189682007, |
|
"rewards/rejected": -0.9577562212944031, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": 23.170560836791992, |
|
"logits/rejected": 23.10344886779785, |
|
"logps/chosen": -350.4610595703125, |
|
"logps/rejected": -267.5567932128906, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.439180463552475, |
|
"rewards/margins": 0.6723843216896057, |
|
"rewards/rejected": -1.1115647554397583, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": 23.414445877075195, |
|
"logits/rejected": 23.61502456665039, |
|
"logps/chosen": -374.15692138671875, |
|
"logps/rejected": -288.3631286621094, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08570393174886703, |
|
"rewards/margins": 0.6574904918670654, |
|
"rewards/rejected": -0.7431942820549011, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": 23.552722930908203, |
|
"logits/rejected": 23.40909194946289, |
|
"logps/chosen": -342.1145324707031, |
|
"logps/rejected": -264.7322082519531, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16522836685180664, |
|
"rewards/margins": 0.6978949904441833, |
|
"rewards/rejected": -0.8631232976913452, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": 23.72678565979004, |
|
"logits/rejected": 23.425289154052734, |
|
"logps/chosen": -331.14410400390625, |
|
"logps/rejected": -285.4593200683594, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10031759738922119, |
|
"rewards/margins": 0.6387730836868286, |
|
"rewards/rejected": -0.739090621471405, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": 23.634296417236328, |
|
"eval_logits/rejected": 23.424331665039062, |
|
"eval_logps/chosen": -353.9219665527344, |
|
"eval_logps/rejected": -281.7333679199219, |
|
"eval_loss": 0.5162664651870728, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": 0.0648159608244896, |
|
"eval_rewards/margins": 0.8298115730285645, |
|
"eval_rewards/rejected": -0.7649956345558167, |
|
"eval_runtime": 211.7482, |
|
"eval_samples_per_second": 9.445, |
|
"eval_steps_per_second": 0.298, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": 23.546445846557617, |
|
"logits/rejected": 23.238723754882812, |
|
"logps/chosen": -307.0013122558594, |
|
"logps/rejected": -247.3063201904297, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08633746951818466, |
|
"rewards/margins": 0.7773478031158447, |
|
"rewards/rejected": -0.8636852502822876, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": 23.390674591064453, |
|
"logits/rejected": 23.3981876373291, |
|
"logps/chosen": -337.4337158203125, |
|
"logps/rejected": -311.81414794921875, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.21913309395313263, |
|
"rewards/margins": 0.4323801100254059, |
|
"rewards/rejected": -0.6515131592750549, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": 23.517433166503906, |
|
"logits/rejected": 23.37581443786621, |
|
"logps/chosen": -291.6369323730469, |
|
"logps/rejected": -265.741943359375, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.25872206687927246, |
|
"rewards/margins": 0.6349440813064575, |
|
"rewards/rejected": -0.8936660885810852, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": 23.460777282714844, |
|
"logits/rejected": 23.235326766967773, |
|
"logps/chosen": -339.9285888671875, |
|
"logps/rejected": -266.0412292480469, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11111575365066528, |
|
"rewards/margins": 0.9949262738227844, |
|
"rewards/rejected": -1.1060421466827393, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": 23.193099975585938, |
|
"logits/rejected": 23.17205238342285, |
|
"logps/chosen": -333.4886474609375, |
|
"logps/rejected": -274.37274169921875, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.505832850933075, |
|
"rewards/margins": 0.6223492622375488, |
|
"rewards/rejected": -1.1281821727752686, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": 23.626953125, |
|
"logits/rejected": 23.612979888916016, |
|
"logps/chosen": -327.89111328125, |
|
"logps/rejected": -297.7337341308594, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.35025396943092346, |
|
"rewards/margins": 0.8313090205192566, |
|
"rewards/rejected": -1.1815630197525024, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": 23.58197593688965, |
|
"logits/rejected": 23.45255470275879, |
|
"logps/chosen": -272.42156982421875, |
|
"logps/rejected": -270.8283386230469, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3132372796535492, |
|
"rewards/margins": 0.5522381663322449, |
|
"rewards/rejected": -0.8654754757881165, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": 23.23889923095703, |
|
"logits/rejected": 23.24991798400879, |
|
"logps/chosen": -314.5959167480469, |
|
"logps/rejected": -257.6392517089844, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4284973740577698, |
|
"rewards/margins": 0.6584367156028748, |
|
"rewards/rejected": -1.086934208869934, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": 23.414813995361328, |
|
"logits/rejected": 23.380718231201172, |
|
"logps/chosen": -291.32501220703125, |
|
"logps/rejected": -256.01263427734375, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.18692317605018616, |
|
"rewards/margins": 0.8645000457763672, |
|
"rewards/rejected": -1.051423192024231, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": 23.40145492553711, |
|
"logits/rejected": 23.43955421447754, |
|
"logps/chosen": -313.07513427734375, |
|
"logps/rejected": -277.38201904296875, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11286661773920059, |
|
"rewards/margins": 0.8762611150741577, |
|
"rewards/rejected": -0.9891278147697449, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": 23.617877960205078, |
|
"eval_logits/rejected": 23.40951156616211, |
|
"eval_logps/chosen": -355.9697570800781, |
|
"eval_logps/rejected": -284.6782531738281, |
|
"eval_loss": 0.5112624764442444, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.1399604231119156, |
|
"eval_rewards/margins": 0.9195234179496765, |
|
"eval_rewards/rejected": -1.0594837665557861, |
|
"eval_runtime": 211.1679, |
|
"eval_samples_per_second": 9.471, |
|
"eval_steps_per_second": 0.298, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": 23.594928741455078, |
|
"logits/rejected": 23.54049301147461, |
|
"logps/chosen": -319.88677978515625, |
|
"logps/rejected": -260.43389892578125, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20980267226696014, |
|
"rewards/margins": 0.9086447954177856, |
|
"rewards/rejected": -1.1184475421905518, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": 23.482894897460938, |
|
"logits/rejected": 23.19647216796875, |
|
"logps/chosen": -338.23223876953125, |
|
"logps/rejected": -269.0614929199219, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4344770014286041, |
|
"rewards/margins": 0.710912823677063, |
|
"rewards/rejected": -1.1453897953033447, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": 23.347646713256836, |
|
"logits/rejected": 23.12314224243164, |
|
"logps/chosen": -311.5711364746094, |
|
"logps/rejected": -240.50125122070312, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3820451498031616, |
|
"rewards/margins": 1.0347092151641846, |
|
"rewards/rejected": -1.4167543649673462, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": 23.311033248901367, |
|
"logits/rejected": 23.248620986938477, |
|
"logps/chosen": -281.490966796875, |
|
"logps/rejected": -240.92086791992188, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.47001034021377563, |
|
"rewards/margins": 0.762096643447876, |
|
"rewards/rejected": -1.2321069240570068, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": 23.50173568725586, |
|
"logits/rejected": 23.377094268798828, |
|
"logps/chosen": -290.4707336425781, |
|
"logps/rejected": -248.4992218017578, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25379228591918945, |
|
"rewards/margins": 0.8668729662895203, |
|
"rewards/rejected": -1.1206653118133545, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": 23.733707427978516, |
|
"logits/rejected": 23.433372497558594, |
|
"logps/chosen": -346.18353271484375, |
|
"logps/rejected": -291.7870788574219, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3051421046257019, |
|
"rewards/margins": 0.9217368364334106, |
|
"rewards/rejected": -1.2268788814544678, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": 23.664628982543945, |
|
"logits/rejected": 23.414520263671875, |
|
"logps/chosen": -396.57269287109375, |
|
"logps/rejected": -270.23681640625, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.19412247836589813, |
|
"rewards/margins": 0.9694843292236328, |
|
"rewards/rejected": -1.163606882095337, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": 23.33928680419922, |
|
"logits/rejected": 23.3987979888916, |
|
"logps/chosen": -381.9424133300781, |
|
"logps/rejected": -267.4436340332031, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1822233498096466, |
|
"rewards/margins": 1.0088589191436768, |
|
"rewards/rejected": -1.191082239151001, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": 23.547710418701172, |
|
"logits/rejected": 23.508426666259766, |
|
"logps/chosen": -332.8504638671875, |
|
"logps/rejected": -287.86712646484375, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.26958388090133667, |
|
"rewards/margins": 0.755517840385437, |
|
"rewards/rejected": -1.025101661682129, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": 23.69491195678711, |
|
"logits/rejected": 23.59137725830078, |
|
"logps/chosen": -287.8290100097656, |
|
"logps/rejected": -236.21438598632812, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07682293653488159, |
|
"rewards/margins": 0.8297051191329956, |
|
"rewards/rejected": -0.906528115272522, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": 23.614517211914062, |
|
"eval_logits/rejected": 23.403518676757812, |
|
"eval_logps/chosen": -354.952880859375, |
|
"eval_logps/rejected": -283.23175048828125, |
|
"eval_loss": 0.5089067220687866, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -0.03827480971813202, |
|
"eval_rewards/margins": 0.8765569925308228, |
|
"eval_rewards/rejected": -0.914831817150116, |
|
"eval_runtime": 210.9611, |
|
"eval_samples_per_second": 9.48, |
|
"eval_steps_per_second": 0.299, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": 23.2309627532959, |
|
"logits/rejected": 23.20724105834961, |
|
"logps/chosen": -246.0962677001953, |
|
"logps/rejected": -230.4759979248047, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.24791307747364044, |
|
"rewards/margins": 0.7959302663803101, |
|
"rewards/rejected": -1.0438432693481445, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": 23.323627471923828, |
|
"logits/rejected": 23.179901123046875, |
|
"logps/chosen": -294.0438537597656, |
|
"logps/rejected": -236.05673217773438, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.21511948108673096, |
|
"rewards/margins": 0.6154388189315796, |
|
"rewards/rejected": -0.8305583000183105, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": 23.346338272094727, |
|
"logits/rejected": 23.224199295043945, |
|
"logps/chosen": -318.80096435546875, |
|
"logps/rejected": -251.92593383789062, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1629999428987503, |
|
"rewards/margins": 0.8454955816268921, |
|
"rewards/rejected": -1.008495569229126, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": 23.233306884765625, |
|
"logits/rejected": 23.179094314575195, |
|
"logps/chosen": -330.75469970703125, |
|
"logps/rejected": -246.1700897216797, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.317868173122406, |
|
"rewards/margins": 0.8588002920150757, |
|
"rewards/rejected": -1.176668405532837, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": 23.4466495513916, |
|
"logits/rejected": 23.421428680419922, |
|
"logps/chosen": -325.29388427734375, |
|
"logps/rejected": -277.3059387207031, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.012738706544041634, |
|
"rewards/margins": 0.8846112489700317, |
|
"rewards/rejected": -0.8973498344421387, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": 23.535139083862305, |
|
"logits/rejected": 23.37562370300293, |
|
"logps/chosen": -335.9436340332031, |
|
"logps/rejected": -274.2239990234375, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1785895973443985, |
|
"rewards/margins": 0.5961320400238037, |
|
"rewards/rejected": -0.7747215628623962, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": 23.633747100830078, |
|
"logits/rejected": 23.46231460571289, |
|
"logps/chosen": -295.21759033203125, |
|
"logps/rejected": -258.9006042480469, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.19572855532169342, |
|
"rewards/margins": 0.8321182131767273, |
|
"rewards/rejected": -1.0278469324111938, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": 23.68822479248047, |
|
"logits/rejected": 23.501148223876953, |
|
"logps/chosen": -299.13140869140625, |
|
"logps/rejected": -279.1519470214844, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.023116961121559143, |
|
"rewards/margins": 1.1388248205184937, |
|
"rewards/rejected": -1.1157079935073853, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": 23.261262893676758, |
|
"logits/rejected": 22.952524185180664, |
|
"logps/chosen": -308.18536376953125, |
|
"logps/rejected": -273.8042907714844, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0076486109755933285, |
|
"rewards/margins": 1.2322968244552612, |
|
"rewards/rejected": -1.2399452924728394, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": 23.609331130981445, |
|
"logits/rejected": 23.460206985473633, |
|
"logps/chosen": -298.27410888671875, |
|
"logps/rejected": -291.9101867675781, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07892550528049469, |
|
"rewards/margins": 0.973551869392395, |
|
"rewards/rejected": -1.0524773597717285, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": 23.585590362548828, |
|
"eval_logits/rejected": 23.38045883178711, |
|
"eval_logps/chosen": -355.79290771484375, |
|
"eval_logps/rejected": -284.2840881347656, |
|
"eval_loss": 0.5076952576637268, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.12227805703878403, |
|
"eval_rewards/margins": 0.8977885842323303, |
|
"eval_rewards/rejected": -1.020066499710083, |
|
"eval_runtime": 209.3271, |
|
"eval_samples_per_second": 9.554, |
|
"eval_steps_per_second": 0.301, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": 23.0650577545166, |
|
"logits/rejected": 22.977046966552734, |
|
"logps/chosen": -336.06597900390625, |
|
"logps/rejected": -278.80828857421875, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.024078911170363426, |
|
"rewards/margins": 1.0799994468688965, |
|
"rewards/rejected": -1.1040784120559692, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": 23.251041412353516, |
|
"logits/rejected": 23.117984771728516, |
|
"logps/chosen": -303.11199951171875, |
|
"logps/rejected": -243.7981719970703, |
|
"loss": 0.415, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18380531668663025, |
|
"rewards/margins": 1.1280765533447266, |
|
"rewards/rejected": -1.3118817806243896, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": 23.297225952148438, |
|
"logits/rejected": 23.318119049072266, |
|
"logps/chosen": -334.6638488769531, |
|
"logps/rejected": -316.2551574707031, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2101324051618576, |
|
"rewards/margins": 0.9589886665344238, |
|
"rewards/rejected": -1.1691210269927979, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": 23.525909423828125, |
|
"logits/rejected": 23.159460067749023, |
|
"logps/chosen": -318.52093505859375, |
|
"logps/rejected": -268.981201171875, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.31944024562835693, |
|
"rewards/margins": 0.7762435674667358, |
|
"rewards/rejected": -1.0956838130950928, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": 23.090410232543945, |
|
"logits/rejected": 23.27143669128418, |
|
"logps/chosen": -314.944580078125, |
|
"logps/rejected": -246.20974731445312, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.34100785851478577, |
|
"rewards/margins": 0.8710842132568359, |
|
"rewards/rejected": -1.2120921611785889, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": 23.36246681213379, |
|
"logits/rejected": 23.223459243774414, |
|
"logps/chosen": -286.5093078613281, |
|
"logps/rejected": -283.33514404296875, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0016206980217248201, |
|
"rewards/margins": 1.3344464302062988, |
|
"rewards/rejected": -1.3328258991241455, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": 23.439857482910156, |
|
"logits/rejected": 23.41635513305664, |
|
"logps/chosen": -319.5511169433594, |
|
"logps/rejected": -310.8269348144531, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.29633527994155884, |
|
"rewards/margins": 0.8939793705940247, |
|
"rewards/rejected": -1.190314531326294, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": 23.304676055908203, |
|
"logits/rejected": 23.236148834228516, |
|
"logps/chosen": -329.2352600097656, |
|
"logps/rejected": -261.53082275390625, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.15099604427814484, |
|
"rewards/margins": 1.2369451522827148, |
|
"rewards/rejected": -1.3879411220550537, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": 23.37632179260254, |
|
"logits/rejected": 23.350784301757812, |
|
"logps/chosen": -360.5380859375, |
|
"logps/rejected": -272.3520812988281, |
|
"loss": 0.442, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.015167620964348316, |
|
"rewards/margins": 1.156204342842102, |
|
"rewards/rejected": -1.1410366296768188, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": 23.032573699951172, |
|
"logits/rejected": 22.9952449798584, |
|
"logps/chosen": -225.4923858642578, |
|
"logps/rejected": -196.6672821044922, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4950261116027832, |
|
"rewards/margins": 0.9079095721244812, |
|
"rewards/rejected": -1.4029356241226196, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": 23.538101196289062, |
|
"eval_logits/rejected": 23.34269142150879, |
|
"eval_logps/chosen": -357.8807373046875, |
|
"eval_logps/rejected": -287.3826599121094, |
|
"eval_loss": 0.5019155144691467, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.3310595154762268, |
|
"eval_rewards/margins": 0.9988633990287781, |
|
"eval_rewards/rejected": -1.3299229145050049, |
|
"eval_runtime": 210.9987, |
|
"eval_samples_per_second": 9.479, |
|
"eval_steps_per_second": 0.299, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": 23.460542678833008, |
|
"logits/rejected": 23.26938247680664, |
|
"logps/chosen": -358.67877197265625, |
|
"logps/rejected": -289.0791931152344, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.26543277502059937, |
|
"rewards/margins": 1.2576963901519775, |
|
"rewards/rejected": -1.5231291055679321, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": 23.63456916809082, |
|
"logits/rejected": 23.502344131469727, |
|
"logps/chosen": -284.9480895996094, |
|
"logps/rejected": -279.4847412109375, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3471234440803528, |
|
"rewards/margins": 1.1683541536331177, |
|
"rewards/rejected": -1.5154775381088257, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": 23.349411010742188, |
|
"logits/rejected": 23.35630226135254, |
|
"logps/chosen": -341.84881591796875, |
|
"logps/rejected": -319.86358642578125, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.29219168424606323, |
|
"rewards/margins": 1.1724560260772705, |
|
"rewards/rejected": -1.464647889137268, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": 23.028972625732422, |
|
"logits/rejected": 23.105947494506836, |
|
"logps/chosen": -298.8593444824219, |
|
"logps/rejected": -308.6123046875, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.41705456376075745, |
|
"rewards/margins": 0.954562783241272, |
|
"rewards/rejected": -1.3716174364089966, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": 23.36569595336914, |
|
"logits/rejected": 23.174901962280273, |
|
"logps/chosen": -414.62567138671875, |
|
"logps/rejected": -282.6720275878906, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.30735766887664795, |
|
"rewards/margins": 1.2201875448226929, |
|
"rewards/rejected": -1.5275452136993408, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": 23.13878059387207, |
|
"logits/rejected": 23.121612548828125, |
|
"logps/chosen": -331.0238342285156, |
|
"logps/rejected": -275.01129150390625, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.260793000459671, |
|
"rewards/margins": 1.1585649251937866, |
|
"rewards/rejected": -1.4193580150604248, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": 23.040502548217773, |
|
"logits/rejected": 22.87631607055664, |
|
"logps/chosen": -301.6041564941406, |
|
"logps/rejected": -246.01254272460938, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19479836523532867, |
|
"rewards/margins": 1.1458656787872314, |
|
"rewards/rejected": -1.340664029121399, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": 23.21322250366211, |
|
"logits/rejected": 22.910724639892578, |
|
"logps/chosen": -270.7756652832031, |
|
"logps/rejected": -233.3585968017578, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4070712924003601, |
|
"rewards/margins": 0.8780719041824341, |
|
"rewards/rejected": -1.2851431369781494, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": 23.265064239501953, |
|
"logits/rejected": 23.274433135986328, |
|
"logps/chosen": -315.3988037109375, |
|
"logps/rejected": -291.09375, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.24129147827625275, |
|
"rewards/margins": 1.1495540142059326, |
|
"rewards/rejected": -1.390845537185669, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": 23.321971893310547, |
|
"logits/rejected": 23.265857696533203, |
|
"logps/chosen": -304.5440368652344, |
|
"logps/rejected": -283.64764404296875, |
|
"loss": 0.4228, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15949824452400208, |
|
"rewards/margins": 1.1103687286376953, |
|
"rewards/rejected": -1.269866704940796, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_logits/chosen": 23.51008415222168, |
|
"eval_logits/rejected": 23.319059371948242, |
|
"eval_logps/chosen": -355.1871337890625, |
|
"eval_logps/rejected": -285.07257080078125, |
|
"eval_loss": 0.5033829212188721, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": -0.06169680133461952, |
|
"eval_rewards/margins": 1.0372183322906494, |
|
"eval_rewards/rejected": -1.0989152193069458, |
|
"eval_runtime": 207.9261, |
|
"eval_samples_per_second": 9.619, |
|
"eval_steps_per_second": 0.303, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": 23.359235763549805, |
|
"logits/rejected": 23.241931915283203, |
|
"logps/chosen": -253.0906219482422, |
|
"logps/rejected": -248.0492706298828, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.09821876138448715, |
|
"rewards/margins": 1.3042573928833008, |
|
"rewards/rejected": -1.4024760723114014, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": 23.251922607421875, |
|
"logits/rejected": 23.224475860595703, |
|
"logps/chosen": -323.7980041503906, |
|
"logps/rejected": -312.51934814453125, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.02340073511004448, |
|
"rewards/margins": 1.2130701541900635, |
|
"rewards/rejected": -1.2364708185195923, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": 23.745332717895508, |
|
"logits/rejected": 23.615753173828125, |
|
"logps/chosen": -313.46453857421875, |
|
"logps/rejected": -308.4986877441406, |
|
"loss": 0.4391, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15489891171455383, |
|
"rewards/margins": 0.8978082537651062, |
|
"rewards/rejected": -1.0527071952819824, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": 23.76753044128418, |
|
"logits/rejected": 23.41860580444336, |
|
"logps/chosen": -348.83258056640625, |
|
"logps/rejected": -300.46893310546875, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.1853850781917572, |
|
"rewards/margins": 1.198961853981018, |
|
"rewards/rejected": -1.3843467235565186, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": 23.356491088867188, |
|
"logits/rejected": 23.26506996154785, |
|
"logps/chosen": -251.2193145751953, |
|
"logps/rejected": -268.14215087890625, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5262545347213745, |
|
"rewards/margins": 0.6762484312057495, |
|
"rewards/rejected": -1.202502965927124, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": 23.517498016357422, |
|
"logits/rejected": 23.316844940185547, |
|
"logps/chosen": -342.25604248046875, |
|
"logps/rejected": -239.2180633544922, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.42340001463890076, |
|
"rewards/margins": 1.1046512126922607, |
|
"rewards/rejected": -1.5280513763427734, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": 23.371051788330078, |
|
"logits/rejected": 23.382333755493164, |
|
"logps/chosen": -388.79425048828125, |
|
"logps/rejected": -311.9518127441406, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25299787521362305, |
|
"rewards/margins": 1.0895098447799683, |
|
"rewards/rejected": -1.3425077199935913, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": 23.398571014404297, |
|
"logits/rejected": 23.297183990478516, |
|
"logps/chosen": -284.0433349609375, |
|
"logps/rejected": -269.79901123046875, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.34444212913513184, |
|
"rewards/margins": 1.0336390733718872, |
|
"rewards/rejected": -1.3780810832977295, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": 23.48178482055664, |
|
"logits/rejected": 23.24820899963379, |
|
"logps/chosen": -316.18109130859375, |
|
"logps/rejected": -268.31182861328125, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.26420363783836365, |
|
"rewards/margins": 1.2460224628448486, |
|
"rewards/rejected": -1.5102260112762451, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": 23.08510971069336, |
|
"logits/rejected": 22.929636001586914, |
|
"logps/chosen": -303.6866760253906, |
|
"logps/rejected": -280.0844421386719, |
|
"loss": 0.4306, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.46213406324386597, |
|
"rewards/margins": 1.072772741317749, |
|
"rewards/rejected": -1.5349067449569702, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_logits/chosen": 23.478702545166016, |
|
"eval_logits/rejected": 23.2889404296875, |
|
"eval_logps/chosen": -356.1548767089844, |
|
"eval_logps/rejected": -285.9320373535156, |
|
"eval_loss": 0.5032446384429932, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -0.15847428143024445, |
|
"eval_rewards/margins": 1.0263888835906982, |
|
"eval_rewards/rejected": -1.1848632097244263, |
|
"eval_runtime": 214.9168, |
|
"eval_samples_per_second": 9.306, |
|
"eval_steps_per_second": 0.293, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": 23.21750831604004, |
|
"logits/rejected": 23.04998016357422, |
|
"logps/chosen": -319.6648254394531, |
|
"logps/rejected": -272.9951477050781, |
|
"loss": 0.42, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12266921997070312, |
|
"rewards/margins": 1.1443729400634766, |
|
"rewards/rejected": -1.2670420408248901, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": 22.977802276611328, |
|
"logits/rejected": 22.969890594482422, |
|
"logps/chosen": -301.5118408203125, |
|
"logps/rejected": -249.35372924804688, |
|
"loss": 0.4142, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.41769084334373474, |
|
"rewards/margins": 0.9436267614364624, |
|
"rewards/rejected": -1.3613176345825195, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": 23.275325775146484, |
|
"logits/rejected": 23.174297332763672, |
|
"logps/chosen": -339.08258056640625, |
|
"logps/rejected": -282.3678283691406, |
|
"loss": 0.422, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15730026364326477, |
|
"rewards/margins": 1.1286394596099854, |
|
"rewards/rejected": -1.2859396934509277, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": 23.317642211914062, |
|
"logits/rejected": 23.297130584716797, |
|
"logps/chosen": -263.40313720703125, |
|
"logps/rejected": -259.40655517578125, |
|
"loss": 0.4378, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5026736259460449, |
|
"rewards/margins": 0.7866870760917664, |
|
"rewards/rejected": -1.289360761642456, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": 23.30654525756836, |
|
"logits/rejected": 23.131305694580078, |
|
"logps/chosen": -299.3922424316406, |
|
"logps/rejected": -269.0783386230469, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.43080934882164, |
|
"rewards/margins": 1.0835515260696411, |
|
"rewards/rejected": -1.514360785484314, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": 23.53885269165039, |
|
"logits/rejected": 23.299760818481445, |
|
"logps/chosen": -324.5937194824219, |
|
"logps/rejected": -272.784912109375, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.40283799171447754, |
|
"rewards/margins": 1.1869771480560303, |
|
"rewards/rejected": -1.5898151397705078, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": 23.592838287353516, |
|
"logits/rejected": 23.14777183532715, |
|
"logps/chosen": -392.72662353515625, |
|
"logps/rejected": -286.9781494140625, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.39330539107322693, |
|
"rewards/margins": 1.0419480800628662, |
|
"rewards/rejected": -1.4352535009384155, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": 23.269372940063477, |
|
"logits/rejected": 22.95911979675293, |
|
"logps/chosen": -316.82574462890625, |
|
"logps/rejected": -261.1885070800781, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.317082941532135, |
|
"rewards/margins": 1.2134672403335571, |
|
"rewards/rejected": -1.530550241470337, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": 23.39419937133789, |
|
"logits/rejected": 23.142974853515625, |
|
"logps/chosen": -336.1145935058594, |
|
"logps/rejected": -242.3621826171875, |
|
"loss": 0.4514, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19971036911010742, |
|
"rewards/margins": 1.381958246231079, |
|
"rewards/rejected": -1.5816686153411865, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": 23.005327224731445, |
|
"logits/rejected": 22.998939514160156, |
|
"logps/chosen": -389.1512756347656, |
|
"logps/rejected": -290.7593688964844, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.16274584829807281, |
|
"rewards/margins": 1.0353623628616333, |
|
"rewards/rejected": -1.198108196258545, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_logits/chosen": 23.455062866210938, |
|
"eval_logits/rejected": 23.266075134277344, |
|
"eval_logps/chosen": -356.9206848144531, |
|
"eval_logps/rejected": -285.68414306640625, |
|
"eval_loss": 0.5029928684234619, |
|
"eval_rewards/accuracies": 0.7817460298538208, |
|
"eval_rewards/chosen": -0.23505355417728424, |
|
"eval_rewards/margins": 0.9250208735466003, |
|
"eval_rewards/rejected": -1.1600743532180786, |
|
"eval_runtime": 212.5498, |
|
"eval_samples_per_second": 9.41, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": 23.323863983154297, |
|
"logits/rejected": 23.302270889282227, |
|
"logps/chosen": -362.5963134765625, |
|
"logps/rejected": -284.63519287109375, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.15233711898326874, |
|
"rewards/margins": 1.4009491205215454, |
|
"rewards/rejected": -1.5532863140106201, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": 23.571504592895508, |
|
"logits/rejected": 23.417720794677734, |
|
"logps/chosen": -313.31170654296875, |
|
"logps/rejected": -293.6142272949219, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16742083430290222, |
|
"rewards/margins": 1.0415483713150024, |
|
"rewards/rejected": -1.208969235420227, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": 23.33370590209961, |
|
"logits/rejected": 23.32365608215332, |
|
"logps/chosen": -299.41351318359375, |
|
"logps/rejected": -302.18939208984375, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1101953536272049, |
|
"rewards/margins": 1.1390321254730225, |
|
"rewards/rejected": -1.2492274045944214, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": 23.091251373291016, |
|
"logits/rejected": 23.06249237060547, |
|
"logps/chosen": -288.70050048828125, |
|
"logps/rejected": -261.63494873046875, |
|
"loss": 0.4288, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.41927942633628845, |
|
"rewards/margins": 0.8092526197433472, |
|
"rewards/rejected": -1.228532075881958, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": 22.919397354125977, |
|
"logits/rejected": 22.959392547607422, |
|
"logps/chosen": -316.126953125, |
|
"logps/rejected": -247.66845703125, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22512026131153107, |
|
"rewards/margins": 1.1928333044052124, |
|
"rewards/rejected": -1.4179537296295166, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": 23.120534896850586, |
|
"logits/rejected": 22.996898651123047, |
|
"logps/chosen": -366.4676513671875, |
|
"logps/rejected": -257.8288879394531, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3532385230064392, |
|
"rewards/margins": 1.007889986038208, |
|
"rewards/rejected": -1.361128568649292, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": 23.175884246826172, |
|
"logits/rejected": 23.074565887451172, |
|
"logps/chosen": -285.8373718261719, |
|
"logps/rejected": -230.23263549804688, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30376359820365906, |
|
"rewards/margins": 0.9630700945854187, |
|
"rewards/rejected": -1.2668339014053345, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": 23.030946731567383, |
|
"logits/rejected": 23.114126205444336, |
|
"logps/chosen": -342.0677185058594, |
|
"logps/rejected": -273.91558837890625, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3027496933937073, |
|
"rewards/margins": 0.7853070497512817, |
|
"rewards/rejected": -1.0880568027496338, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": 23.109298706054688, |
|
"logits/rejected": 22.95934295654297, |
|
"logps/chosen": -353.9482727050781, |
|
"logps/rejected": -293.26165771484375, |
|
"loss": 0.3973, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.19786301255226135, |
|
"rewards/margins": 1.2528201341629028, |
|
"rewards/rejected": -1.4506832361221313, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": 23.07802391052246, |
|
"logits/rejected": 22.99662208557129, |
|
"logps/chosen": -328.4552001953125, |
|
"logps/rejected": -250.57901000976562, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1944916695356369, |
|
"rewards/margins": 1.2932884693145752, |
|
"rewards/rejected": -1.4877803325653076, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_logits/chosen": 23.452411651611328, |
|
"eval_logits/rejected": 23.262121200561523, |
|
"eval_logps/chosen": -355.9715576171875, |
|
"eval_logps/rejected": -285.541748046875, |
|
"eval_loss": 0.49968841671943665, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": -0.1401444375514984, |
|
"eval_rewards/margins": 1.0056895017623901, |
|
"eval_rewards/rejected": -1.1458338499069214, |
|
"eval_runtime": 210.203, |
|
"eval_samples_per_second": 9.515, |
|
"eval_steps_per_second": 0.3, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": 23.38498306274414, |
|
"logits/rejected": 23.101451873779297, |
|
"logps/chosen": -313.84991455078125, |
|
"logps/rejected": -229.9058837890625, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2590484023094177, |
|
"rewards/margins": 0.8956319689750671, |
|
"rewards/rejected": -1.1546803712844849, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": 23.376522064208984, |
|
"logits/rejected": 23.071407318115234, |
|
"logps/chosen": -294.4135437011719, |
|
"logps/rejected": -286.2037658691406, |
|
"loss": 0.4243, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.2608863413333893, |
|
"rewards/margins": 1.1486496925354004, |
|
"rewards/rejected": -1.4095360040664673, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": 23.766990661621094, |
|
"logits/rejected": 23.534847259521484, |
|
"logps/chosen": -363.1257629394531, |
|
"logps/rejected": -257.43377685546875, |
|
"loss": 0.4044, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.28155818581581116, |
|
"rewards/margins": 1.1225359439849854, |
|
"rewards/rejected": -1.4040942192077637, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": 23.727947235107422, |
|
"logits/rejected": 23.551546096801758, |
|
"logps/chosen": -354.59808349609375, |
|
"logps/rejected": -309.74041748046875, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2375718653202057, |
|
"rewards/margins": 0.9404077529907227, |
|
"rewards/rejected": -1.177979588508606, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": 23.435352325439453, |
|
"logits/rejected": 23.340909957885742, |
|
"logps/chosen": -323.2891845703125, |
|
"logps/rejected": -256.3253479003906, |
|
"loss": 0.4108, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.21504418551921844, |
|
"rewards/margins": 1.1017714738845825, |
|
"rewards/rejected": -1.316815733909607, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": 23.279882431030273, |
|
"logits/rejected": 22.866252899169922, |
|
"logps/chosen": -376.61431884765625, |
|
"logps/rejected": -252.8503875732422, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24882745742797852, |
|
"rewards/margins": 1.2063392400741577, |
|
"rewards/rejected": -1.4551665782928467, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": 23.572551727294922, |
|
"logits/rejected": 23.359222412109375, |
|
"logps/chosen": -348.06396484375, |
|
"logps/rejected": -301.94830322265625, |
|
"loss": 0.4353, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1706874668598175, |
|
"rewards/margins": 1.228468656539917, |
|
"rewards/rejected": -1.399156093597412, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": 23.559356689453125, |
|
"logits/rejected": 23.3609676361084, |
|
"logps/chosen": -354.1952209472656, |
|
"logps/rejected": -299.01385498046875, |
|
"loss": 0.392, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.025229115039110184, |
|
"rewards/margins": 1.3790032863616943, |
|
"rewards/rejected": -1.3537743091583252, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": 23.176847457885742, |
|
"logits/rejected": 23.128990173339844, |
|
"logps/chosen": -395.842529296875, |
|
"logps/rejected": -295.98162841796875, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10837908089160919, |
|
"rewards/margins": 1.3724091053009033, |
|
"rewards/rejected": -1.480788230895996, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": 23.313915252685547, |
|
"logits/rejected": 23.103626251220703, |
|
"logps/chosen": -350.606689453125, |
|
"logps/rejected": -294.15594482421875, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.2652584910392761, |
|
"rewards/margins": 1.1176103353500366, |
|
"rewards/rejected": -1.382868766784668, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/chosen": 23.417835235595703, |
|
"eval_logits/rejected": 23.231985092163086, |
|
"eval_logps/chosen": -357.8829650878906, |
|
"eval_logps/rejected": -287.6752014160156, |
|
"eval_loss": 0.5009579062461853, |
|
"eval_rewards/accuracies": 0.773809552192688, |
|
"eval_rewards/chosen": -0.3312842845916748, |
|
"eval_rewards/margins": 1.0278921127319336, |
|
"eval_rewards/rejected": -1.3591763973236084, |
|
"eval_runtime": 211.3907, |
|
"eval_samples_per_second": 9.461, |
|
"eval_steps_per_second": 0.298, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": 23.531373977661133, |
|
"logits/rejected": 23.429351806640625, |
|
"logps/chosen": -347.46490478515625, |
|
"logps/rejected": -289.24176025390625, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.28774064779281616, |
|
"rewards/margins": 1.0696442127227783, |
|
"rewards/rejected": -1.3573849201202393, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": 23.160110473632812, |
|
"logits/rejected": 23.07761001586914, |
|
"logps/chosen": -372.6455993652344, |
|
"logps/rejected": -254.6509246826172, |
|
"loss": 0.4312, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.29867392778396606, |
|
"rewards/margins": 1.093621850013733, |
|
"rewards/rejected": -1.3922955989837646, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": 23.148435592651367, |
|
"logits/rejected": 23.090463638305664, |
|
"logps/chosen": -316.5802917480469, |
|
"logps/rejected": -288.7501220703125, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3632759749889374, |
|
"rewards/margins": 1.0216796398162842, |
|
"rewards/rejected": -1.3849557638168335, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": 23.17940330505371, |
|
"logits/rejected": 23.190204620361328, |
|
"logps/chosen": -346.47625732421875, |
|
"logps/rejected": -270.1147155761719, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2974759638309479, |
|
"rewards/margins": 1.0174903869628906, |
|
"rewards/rejected": -1.3149662017822266, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": 23.138202667236328, |
|
"logits/rejected": 22.985610961914062, |
|
"logps/chosen": -340.69940185546875, |
|
"logps/rejected": -311.3159484863281, |
|
"loss": 0.4341, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1574067324399948, |
|
"rewards/margins": 1.2777574062347412, |
|
"rewards/rejected": -1.4351643323898315, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": 23.348569869995117, |
|
"logits/rejected": 23.167980194091797, |
|
"logps/chosen": -273.6280822753906, |
|
"logps/rejected": -238.4679412841797, |
|
"loss": 0.4392, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.03210877254605293, |
|
"rewards/margins": 1.156890630722046, |
|
"rewards/rejected": -1.1889994144439697, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": 23.42279052734375, |
|
"logits/rejected": 23.08903694152832, |
|
"logps/chosen": -358.487548828125, |
|
"logps/rejected": -266.75341796875, |
|
"loss": 0.3848, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0037168667186051607, |
|
"rewards/margins": 1.2687807083129883, |
|
"rewards/rejected": -1.272497534751892, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": 23.328954696655273, |
|
"logits/rejected": 23.21335792541504, |
|
"logps/chosen": -294.31402587890625, |
|
"logps/rejected": -263.2884826660156, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1213906854391098, |
|
"rewards/margins": 0.9413496255874634, |
|
"rewards/rejected": -1.0627403259277344, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": 23.385282516479492, |
|
"logits/rejected": 23.229637145996094, |
|
"logps/chosen": -392.8078308105469, |
|
"logps/rejected": -314.957275390625, |
|
"loss": 0.4084, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1200430616736412, |
|
"rewards/margins": 1.2001924514770508, |
|
"rewards/rejected": -1.3202354907989502, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": 23.32499122619629, |
|
"logits/rejected": 23.208293914794922, |
|
"logps/chosen": -338.33892822265625, |
|
"logps/rejected": -305.3815612792969, |
|
"loss": 0.408, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3193683624267578, |
|
"rewards/margins": 1.028808832168579, |
|
"rewards/rejected": -1.348177433013916, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_logits/chosen": 23.395021438598633, |
|
"eval_logits/rejected": 23.213520050048828, |
|
"eval_logps/chosen": -357.0264892578125, |
|
"eval_logps/rejected": -287.1567687988281, |
|
"eval_loss": 0.4989284873008728, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.24563594162464142, |
|
"eval_rewards/margins": 1.0617001056671143, |
|
"eval_rewards/rejected": -1.3073359727859497, |
|
"eval_runtime": 212.6457, |
|
"eval_samples_per_second": 9.405, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": 23.246747970581055, |
|
"logits/rejected": 23.268218994140625, |
|
"logps/chosen": -298.40838623046875, |
|
"logps/rejected": -294.05877685546875, |
|
"loss": 0.4063, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.37956395745277405, |
|
"rewards/margins": 1.2896459102630615, |
|
"rewards/rejected": -1.6692098379135132, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": 23.170940399169922, |
|
"logits/rejected": 23.139057159423828, |
|
"logps/chosen": -333.7245178222656, |
|
"logps/rejected": -287.9451599121094, |
|
"loss": 0.4352, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.23647813498973846, |
|
"rewards/margins": 1.159517526626587, |
|
"rewards/rejected": -1.3959954977035522, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": 23.052528381347656, |
|
"logits/rejected": 22.96520233154297, |
|
"logps/chosen": -327.0295104980469, |
|
"logps/rejected": -272.34539794921875, |
|
"loss": 0.4108, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3730023503303528, |
|
"rewards/margins": 1.0741784572601318, |
|
"rewards/rejected": -1.4471808671951294, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": 23.350069046020508, |
|
"logits/rejected": 23.162134170532227, |
|
"logps/chosen": -338.16583251953125, |
|
"logps/rejected": -292.6080627441406, |
|
"loss": 0.4213, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.24311116337776184, |
|
"rewards/margins": 1.0276445150375366, |
|
"rewards/rejected": -1.270755648612976, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": 23.173582077026367, |
|
"logits/rejected": 23.145183563232422, |
|
"logps/chosen": -329.4697265625, |
|
"logps/rejected": -266.3700256347656, |
|
"loss": 0.407, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.023505190387368202, |
|
"rewards/margins": 1.2212746143341064, |
|
"rewards/rejected": -1.2447797060012817, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": 23.282878875732422, |
|
"logits/rejected": 23.070077896118164, |
|
"logps/chosen": -321.710205078125, |
|
"logps/rejected": -277.3352966308594, |
|
"loss": 0.4071, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.25008895993232727, |
|
"rewards/margins": 1.3580764532089233, |
|
"rewards/rejected": -1.6081653833389282, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": 23.08016586303711, |
|
"logits/rejected": 23.203523635864258, |
|
"logps/chosen": -297.59130859375, |
|
"logps/rejected": -280.3871765136719, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.3134928345680237, |
|
"rewards/margins": 1.07438063621521, |
|
"rewards/rejected": -1.387873649597168, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": 23.223857879638672, |
|
"logits/rejected": 23.118579864501953, |
|
"logps/chosen": -307.32745361328125, |
|
"logps/rejected": -264.244384765625, |
|
"loss": 0.4215, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4751613140106201, |
|
"rewards/margins": 1.2573087215423584, |
|
"rewards/rejected": -1.732469916343689, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": 23.08175277709961, |
|
"logits/rejected": 23.210302352905273, |
|
"logps/chosen": -348.2358093261719, |
|
"logps/rejected": -269.00909423828125, |
|
"loss": 0.4148, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5350313186645508, |
|
"rewards/margins": 1.2447912693023682, |
|
"rewards/rejected": -1.779822587966919, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": 23.117109298706055, |
|
"logits/rejected": 23.03956413269043, |
|
"logps/chosen": -350.7597961425781, |
|
"logps/rejected": -257.7859191894531, |
|
"loss": 0.4076, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.42596331238746643, |
|
"rewards/margins": 1.1088837385177612, |
|
"rewards/rejected": -1.5348470211029053, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": 23.361677169799805, |
|
"eval_logits/rejected": 23.18657112121582, |
|
"eval_logps/chosen": -358.4737854003906, |
|
"eval_logps/rejected": -288.44818115234375, |
|
"eval_loss": 0.4995974004268646, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -0.39036476612091064, |
|
"eval_rewards/margins": 1.0461114645004272, |
|
"eval_rewards/rejected": -1.4364763498306274, |
|
"eval_runtime": 207.254, |
|
"eval_samples_per_second": 9.65, |
|
"eval_steps_per_second": 0.304, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": 23.419483184814453, |
|
"logits/rejected": 23.20507049560547, |
|
"logps/chosen": -321.2181396484375, |
|
"logps/rejected": -264.3491516113281, |
|
"loss": 0.4189, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5122248530387878, |
|
"rewards/margins": 1.2178277969360352, |
|
"rewards/rejected": -1.7300525903701782, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": 22.683643341064453, |
|
"logits/rejected": 22.83184242248535, |
|
"logps/chosen": -339.54498291015625, |
|
"logps/rejected": -250.641845703125, |
|
"loss": 0.4102, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3275443911552429, |
|
"rewards/margins": 1.4303803443908691, |
|
"rewards/rejected": -1.7579247951507568, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": 23.576740264892578, |
|
"logits/rejected": 23.395267486572266, |
|
"logps/chosen": -349.0409240722656, |
|
"logps/rejected": -268.94268798828125, |
|
"loss": 0.4055, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.29274967312812805, |
|
"rewards/margins": 1.1743746995925903, |
|
"rewards/rejected": -1.4671242237091064, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": 23.352815628051758, |
|
"logits/rejected": 23.3568058013916, |
|
"logps/chosen": -352.31304931640625, |
|
"logps/rejected": -284.7132873535156, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.33104512095451355, |
|
"rewards/margins": 1.168330192565918, |
|
"rewards/rejected": -1.499375343322754, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": 23.261768341064453, |
|
"logits/rejected": 23.228496551513672, |
|
"logps/chosen": -345.1585998535156, |
|
"logps/rejected": -339.8830871582031, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.12148020416498184, |
|
"rewards/margins": 1.3025624752044678, |
|
"rewards/rejected": -1.424042820930481, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": 23.22171401977539, |
|
"logits/rejected": 22.969928741455078, |
|
"logps/chosen": -328.1639099121094, |
|
"logps/rejected": -222.90762329101562, |
|
"loss": 0.406, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.09366317093372345, |
|
"rewards/margins": 1.3427413702011108, |
|
"rewards/rejected": -1.4364044666290283, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": 22.87033462524414, |
|
"logits/rejected": 23.068653106689453, |
|
"logps/chosen": -331.31146240234375, |
|
"logps/rejected": -285.5386962890625, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.057704973965883255, |
|
"rewards/margins": 1.1090171337127686, |
|
"rewards/rejected": -1.1667221784591675, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": 23.055828094482422, |
|
"logits/rejected": 23.081539154052734, |
|
"logps/chosen": -312.4957275390625, |
|
"logps/rejected": -268.7204895019531, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.07027649134397507, |
|
"rewards/margins": 1.1952614784240723, |
|
"rewards/rejected": -1.2655378580093384, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": 22.958328247070312, |
|
"logits/rejected": 23.175926208496094, |
|
"logps/chosen": -312.7813720703125, |
|
"logps/rejected": -272.52215576171875, |
|
"loss": 0.4153, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.2530692219734192, |
|
"rewards/margins": 1.2337100505828857, |
|
"rewards/rejected": -1.4867792129516602, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": 23.173688888549805, |
|
"logits/rejected": 23.038455963134766, |
|
"logps/chosen": -327.50531005859375, |
|
"logps/rejected": -257.269287109375, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3794935941696167, |
|
"rewards/margins": 1.0488073825836182, |
|
"rewards/rejected": -1.4283010959625244, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": 23.329803466796875, |
|
"eval_logits/rejected": 23.160478591918945, |
|
"eval_logps/chosen": -357.08575439453125, |
|
"eval_logps/rejected": -286.7316589355469, |
|
"eval_loss": 0.5008072853088379, |
|
"eval_rewards/accuracies": 0.7857142686843872, |
|
"eval_rewards/chosen": -0.25156161189079285, |
|
"eval_rewards/margins": 1.0132601261138916, |
|
"eval_rewards/rejected": -1.2648216485977173, |
|
"eval_runtime": 212.8249, |
|
"eval_samples_per_second": 9.397, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": 23.098756790161133, |
|
"logits/rejected": 23.10630226135254, |
|
"logps/chosen": -345.1263122558594, |
|
"logps/rejected": -260.34613037109375, |
|
"loss": 0.4359, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.14109382033348083, |
|
"rewards/margins": 1.0066546201705933, |
|
"rewards/rejected": -1.147748589515686, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": 23.233016967773438, |
|
"logits/rejected": 23.24778175354004, |
|
"logps/chosen": -295.8356628417969, |
|
"logps/rejected": -244.9901885986328, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.49083733558654785, |
|
"rewards/margins": 1.3773233890533447, |
|
"rewards/rejected": -1.868160605430603, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": 23.030765533447266, |
|
"logits/rejected": 22.91606330871582, |
|
"logps/chosen": -311.2125549316406, |
|
"logps/rejected": -287.0599670410156, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3956405520439148, |
|
"rewards/margins": 1.194427728652954, |
|
"rewards/rejected": -1.5900681018829346, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": 22.784257888793945, |
|
"logits/rejected": 22.93459701538086, |
|
"logps/chosen": -270.6285095214844, |
|
"logps/rejected": -242.1689453125, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.2096777856349945, |
|
"rewards/margins": 1.2774814367294312, |
|
"rewards/rejected": -1.487159252166748, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": 22.975915908813477, |
|
"logits/rejected": 23.076732635498047, |
|
"logps/chosen": -285.87164306640625, |
|
"logps/rejected": -285.27105712890625, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.4368739724159241, |
|
"rewards/margins": 1.2533791065216064, |
|
"rewards/rejected": -1.6902532577514648, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": 23.094058990478516, |
|
"logits/rejected": 23.000532150268555, |
|
"logps/chosen": -311.85418701171875, |
|
"logps/rejected": -342.6611633300781, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3331337571144104, |
|
"rewards/margins": 1.497775912284851, |
|
"rewards/rejected": -1.8309099674224854, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": 23.15587043762207, |
|
"logits/rejected": 22.996431350708008, |
|
"logps/chosen": -284.33099365234375, |
|
"logps/rejected": -247.60586547851562, |
|
"loss": 0.3782, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3313903212547302, |
|
"rewards/margins": 1.394980549812317, |
|
"rewards/rejected": -1.7263710498809814, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": 23.24311637878418, |
|
"logits/rejected": 23.19965171813965, |
|
"logps/chosen": -307.9765930175781, |
|
"logps/rejected": -258.64697265625, |
|
"loss": 0.3464, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.12858574092388153, |
|
"rewards/margins": 1.5073888301849365, |
|
"rewards/rejected": -1.635974645614624, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": 23.421756744384766, |
|
"logits/rejected": 23.180278778076172, |
|
"logps/chosen": -351.59796142578125, |
|
"logps/rejected": -271.3390808105469, |
|
"loss": 0.3522, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3069804012775421, |
|
"rewards/margins": 1.5511845350265503, |
|
"rewards/rejected": -1.8581645488739014, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": 23.29781723022461, |
|
"logits/rejected": 23.215654373168945, |
|
"logps/chosen": -335.22943115234375, |
|
"logps/rejected": -245.5157470703125, |
|
"loss": 0.3469, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.39654579758644104, |
|
"rewards/margins": 1.623525857925415, |
|
"rewards/rejected": -2.020071506500244, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_logits/chosen": 23.29904556274414, |
|
"eval_logits/rejected": 23.136056900024414, |
|
"eval_logps/chosen": -357.43829345703125, |
|
"eval_logps/rejected": -287.9998779296875, |
|
"eval_loss": 0.49774664640426636, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.28681743144989014, |
|
"eval_rewards/margins": 1.10482656955719, |
|
"eval_rewards/rejected": -1.39164400100708, |
|
"eval_runtime": 207.5885, |
|
"eval_samples_per_second": 9.634, |
|
"eval_steps_per_second": 0.303, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": 23.3192138671875, |
|
"logits/rejected": 23.1693115234375, |
|
"logps/chosen": -360.62701416015625, |
|
"logps/rejected": -267.00836181640625, |
|
"loss": 0.3505, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2135746031999588, |
|
"rewards/margins": 1.6586072444915771, |
|
"rewards/rejected": -1.8721816539764404, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": 22.96431541442871, |
|
"logits/rejected": 22.824430465698242, |
|
"logps/chosen": -327.3959655761719, |
|
"logps/rejected": -293.3819580078125, |
|
"loss": 0.3412, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4780098795890808, |
|
"rewards/margins": 1.3323651552200317, |
|
"rewards/rejected": -1.8103749752044678, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": 22.99736213684082, |
|
"logits/rejected": 22.809282302856445, |
|
"logps/chosen": -286.00433349609375, |
|
"logps/rejected": -253.54635620117188, |
|
"loss": 0.3509, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5133158564567566, |
|
"rewards/margins": 1.4120731353759766, |
|
"rewards/rejected": -1.9253889322280884, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": 22.923995971679688, |
|
"logits/rejected": 22.87368392944336, |
|
"logps/chosen": -293.9073791503906, |
|
"logps/rejected": -263.51397705078125, |
|
"loss": 0.3527, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.28854408860206604, |
|
"rewards/margins": 1.3332937955856323, |
|
"rewards/rejected": -1.6218379735946655, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": 23.294551849365234, |
|
"logits/rejected": 23.248403549194336, |
|
"logps/chosen": -354.1997985839844, |
|
"logps/rejected": -338.943603515625, |
|
"loss": 0.3538, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2328769713640213, |
|
"rewards/margins": 1.2254259586334229, |
|
"rewards/rejected": -1.4583029747009277, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": 23.40046501159668, |
|
"logits/rejected": 23.170814514160156, |
|
"logps/chosen": -376.11871337890625, |
|
"logps/rejected": -280.2356872558594, |
|
"loss": 0.3409, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.1297599822282791, |
|
"rewards/margins": 1.5910810232162476, |
|
"rewards/rejected": -1.7208411693572998, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": 23.402286529541016, |
|
"logits/rejected": 23.344829559326172, |
|
"logps/chosen": -316.00634765625, |
|
"logps/rejected": -308.0472717285156, |
|
"loss": 0.3556, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.00810793973505497, |
|
"rewards/margins": 1.592138409614563, |
|
"rewards/rejected": -1.5840303897857666, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": 23.049579620361328, |
|
"logits/rejected": 23.11331558227539, |
|
"logps/chosen": -320.4759826660156, |
|
"logps/rejected": -237.2650604248047, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.1983695775270462, |
|
"rewards/margins": 1.2847638130187988, |
|
"rewards/rejected": -1.4831334352493286, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": 23.234739303588867, |
|
"logits/rejected": 22.95262336730957, |
|
"logps/chosen": -360.20367431640625, |
|
"logps/rejected": -292.31317138671875, |
|
"loss": 0.3307, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17867961525917053, |
|
"rewards/margins": 1.6552801132202148, |
|
"rewards/rejected": -1.8339598178863525, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": 23.25923728942871, |
|
"logits/rejected": 23.007633209228516, |
|
"logps/chosen": -305.6047058105469, |
|
"logps/rejected": -246.4465789794922, |
|
"loss": 0.3547, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2684822380542755, |
|
"rewards/margins": 1.560947060585022, |
|
"rewards/rejected": -1.829429268836975, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_logits/chosen": 23.273019790649414, |
|
"eval_logits/rejected": 23.114229202270508, |
|
"eval_logps/chosen": -358.821044921875, |
|
"eval_logps/rejected": -289.5934753417969, |
|
"eval_loss": 0.49868160486221313, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": -0.42508772015571594, |
|
"eval_rewards/margins": 1.125916838645935, |
|
"eval_rewards/rejected": -1.5510046482086182, |
|
"eval_runtime": 211.1219, |
|
"eval_samples_per_second": 9.473, |
|
"eval_steps_per_second": 0.298, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": 23.230274200439453, |
|
"logits/rejected": 23.15807342529297, |
|
"logps/chosen": -334.8540954589844, |
|
"logps/rejected": -263.5167236328125, |
|
"loss": 0.3289, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.251259982585907, |
|
"rewards/margins": 1.5572983026504517, |
|
"rewards/rejected": -1.8085582256317139, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": 23.217041015625, |
|
"logits/rejected": 23.1701717376709, |
|
"logps/chosen": -330.7687683105469, |
|
"logps/rejected": -285.98907470703125, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.2551344037055969, |
|
"rewards/margins": 1.4968717098236084, |
|
"rewards/rejected": -1.7520062923431396, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": 23.419551849365234, |
|
"logits/rejected": 23.216039657592773, |
|
"logps/chosen": -312.8890380859375, |
|
"logps/rejected": -267.16729736328125, |
|
"loss": 0.3688, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.34911391139030457, |
|
"rewards/margins": 1.548995018005371, |
|
"rewards/rejected": -1.8981088399887085, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": 22.947824478149414, |
|
"logits/rejected": 22.82015037536621, |
|
"logps/chosen": -313.2303771972656, |
|
"logps/rejected": -252.9609375, |
|
"loss": 0.3447, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2606348693370819, |
|
"rewards/margins": 1.5146172046661377, |
|
"rewards/rejected": -1.7752519845962524, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": 23.21828269958496, |
|
"logits/rejected": 23.22684097290039, |
|
"logps/chosen": -369.90924072265625, |
|
"logps/rejected": -314.48016357421875, |
|
"loss": 0.3561, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.12711670994758606, |
|
"rewards/margins": 1.666666030883789, |
|
"rewards/rejected": -1.7937828302383423, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": 23.231754302978516, |
|
"logits/rejected": 23.19542694091797, |
|
"logps/chosen": -351.79913330078125, |
|
"logps/rejected": -271.44427490234375, |
|
"loss": 0.3303, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14719603955745697, |
|
"rewards/margins": 1.6395785808563232, |
|
"rewards/rejected": -1.7867748737335205, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": 22.95505142211914, |
|
"logits/rejected": 22.855873107910156, |
|
"logps/chosen": -340.75238037109375, |
|
"logps/rejected": -316.02386474609375, |
|
"loss": 0.3293, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.26123401522636414, |
|
"rewards/margins": 1.4234856367111206, |
|
"rewards/rejected": -1.6847198009490967, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": 23.029155731201172, |
|
"logits/rejected": 23.120052337646484, |
|
"logps/chosen": -371.22100830078125, |
|
"logps/rejected": -319.261474609375, |
|
"loss": 0.3694, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3324764668941498, |
|
"rewards/margins": 1.6106780767440796, |
|
"rewards/rejected": -1.9431545734405518, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": 23.185413360595703, |
|
"logits/rejected": 22.925167083740234, |
|
"logps/chosen": -305.14410400390625, |
|
"logps/rejected": -296.2259521484375, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.40003857016563416, |
|
"rewards/margins": 1.3513580560684204, |
|
"rewards/rejected": -1.7513965368270874, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": 23.18272590637207, |
|
"logits/rejected": 23.115558624267578, |
|
"logps/chosen": -306.1884460449219, |
|
"logps/rejected": -288.0213928222656, |
|
"loss": 0.3468, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.359417200088501, |
|
"rewards/margins": 1.435274362564087, |
|
"rewards/rejected": -1.7946914434432983, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_logits/chosen": 23.256072998046875, |
|
"eval_logits/rejected": 23.099788665771484, |
|
"eval_logps/chosen": -357.2442932128906, |
|
"eval_logps/rejected": -288.0285339355469, |
|
"eval_loss": 0.49792206287384033, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.26741600036621094, |
|
"eval_rewards/margins": 1.1270908117294312, |
|
"eval_rewards/rejected": -1.394506812095642, |
|
"eval_runtime": 210.9966, |
|
"eval_samples_per_second": 9.479, |
|
"eval_steps_per_second": 0.299, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": 23.182937622070312, |
|
"logits/rejected": 23.00518035888672, |
|
"logps/chosen": -325.1390075683594, |
|
"logps/rejected": -254.9105224609375, |
|
"loss": 0.3562, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2701284885406494, |
|
"rewards/margins": 1.4486573934555054, |
|
"rewards/rejected": -1.7187858819961548, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": 23.192270278930664, |
|
"logits/rejected": 22.86314582824707, |
|
"logps/chosen": -354.3294677734375, |
|
"logps/rejected": -277.08319091796875, |
|
"loss": 0.3275, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3135210871696472, |
|
"rewards/margins": 1.6535711288452148, |
|
"rewards/rejected": -1.9670922756195068, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": 23.285938262939453, |
|
"logits/rejected": 23.20859146118164, |
|
"logps/chosen": -341.8558044433594, |
|
"logps/rejected": -260.49853515625, |
|
"loss": 0.3339, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.49324899911880493, |
|
"rewards/margins": 1.3661364316940308, |
|
"rewards/rejected": -1.8593854904174805, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": 23.054574966430664, |
|
"logits/rejected": 22.94180679321289, |
|
"logps/chosen": -314.9513244628906, |
|
"logps/rejected": -253.89779663085938, |
|
"loss": 0.3683, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5942099690437317, |
|
"rewards/margins": 1.3034783601760864, |
|
"rewards/rejected": -1.8976882696151733, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": 23.040285110473633, |
|
"logits/rejected": 23.092458724975586, |
|
"logps/chosen": -347.84820556640625, |
|
"logps/rejected": -300.02069091796875, |
|
"loss": 0.3424, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3747033476829529, |
|
"rewards/margins": 1.4116895198822021, |
|
"rewards/rejected": -1.7863928079605103, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": 23.061431884765625, |
|
"logits/rejected": 22.941814422607422, |
|
"logps/chosen": -363.65850830078125, |
|
"logps/rejected": -299.712890625, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.34447842836380005, |
|
"rewards/margins": 1.6744133234024048, |
|
"rewards/rejected": -2.0188918113708496, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": 23.18351936340332, |
|
"logits/rejected": 23.060955047607422, |
|
"logps/chosen": -386.75701904296875, |
|
"logps/rejected": -311.6101379394531, |
|
"loss": 0.3375, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.2950562834739685, |
|
"rewards/margins": 1.5026452541351318, |
|
"rewards/rejected": -1.7977014780044556, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": 23.240421295166016, |
|
"logits/rejected": 23.235857009887695, |
|
"logps/chosen": -306.81561279296875, |
|
"logps/rejected": -249.85324096679688, |
|
"loss": 0.3425, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.47128137946128845, |
|
"rewards/margins": 1.526531457901001, |
|
"rewards/rejected": -1.9978128671646118, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": 23.31965446472168, |
|
"logits/rejected": 23.00503158569336, |
|
"logps/chosen": -366.4286193847656, |
|
"logps/rejected": -287.65399169921875, |
|
"loss": 0.3404, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20483896136283875, |
|
"rewards/margins": 1.4863460063934326, |
|
"rewards/rejected": -1.6911849975585938, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": 23.109445571899414, |
|
"logits/rejected": 23.121734619140625, |
|
"logps/chosen": -339.75689697265625, |
|
"logps/rejected": -266.4504699707031, |
|
"loss": 0.3432, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.32633692026138306, |
|
"rewards/margins": 1.3159123659133911, |
|
"rewards/rejected": -1.642249345779419, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_logits/chosen": 23.223342895507812, |
|
"eval_logits/rejected": 23.0726318359375, |
|
"eval_logps/chosen": -358.362060546875, |
|
"eval_logps/rejected": -288.7130126953125, |
|
"eval_loss": 0.5026321411132812, |
|
"eval_rewards/accuracies": 0.773809552192688, |
|
"eval_rewards/chosen": -0.3791937828063965, |
|
"eval_rewards/margins": 1.0837651491165161, |
|
"eval_rewards/rejected": -1.4629590511322021, |
|
"eval_runtime": 212.4288, |
|
"eval_samples_per_second": 9.415, |
|
"eval_steps_per_second": 0.297, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": 22.923072814941406, |
|
"logits/rejected": 22.96480369567871, |
|
"logps/chosen": -329.4259338378906, |
|
"logps/rejected": -294.24127197265625, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3439275026321411, |
|
"rewards/margins": 1.2762069702148438, |
|
"rewards/rejected": -1.6201345920562744, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": 23.266117095947266, |
|
"logits/rejected": 23.19167137145996, |
|
"logps/chosen": -274.62237548828125, |
|
"logps/rejected": -247.6970672607422, |
|
"loss": 0.3588, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.4183998107910156, |
|
"rewards/margins": 1.5969486236572266, |
|
"rewards/rejected": -2.015348434448242, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": 23.18532943725586, |
|
"logits/rejected": 23.074344635009766, |
|
"logps/chosen": -333.7981262207031, |
|
"logps/rejected": -262.7727966308594, |
|
"loss": 0.3035, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3411490321159363, |
|
"rewards/margins": 1.6993077993392944, |
|
"rewards/rejected": -2.040456771850586, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": 22.650278091430664, |
|
"logits/rejected": 22.700809478759766, |
|
"logps/chosen": -256.5923156738281, |
|
"logps/rejected": -267.3676452636719, |
|
"loss": 0.3527, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5605247616767883, |
|
"rewards/margins": 1.3785268068313599, |
|
"rewards/rejected": -1.939051628112793, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": 23.22505760192871, |
|
"logits/rejected": 23.044265747070312, |
|
"logps/chosen": -377.84765625, |
|
"logps/rejected": -308.9931945800781, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.2691905200481415, |
|
"rewards/margins": 1.7195707559585571, |
|
"rewards/rejected": -1.988761305809021, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": 23.209278106689453, |
|
"logits/rejected": 23.045013427734375, |
|
"logps/chosen": -341.085205078125, |
|
"logps/rejected": -284.80987548828125, |
|
"loss": 0.3373, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.32976096868515015, |
|
"rewards/margins": 1.4313738346099854, |
|
"rewards/rejected": -1.7611347436904907, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": 22.989057540893555, |
|
"logits/rejected": 22.930797576904297, |
|
"logps/chosen": -349.1778564453125, |
|
"logps/rejected": -276.46905517578125, |
|
"loss": 0.351, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3507656157016754, |
|
"rewards/margins": 1.4063034057617188, |
|
"rewards/rejected": -1.7570692300796509, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": 22.69498062133789, |
|
"logits/rejected": 22.762619018554688, |
|
"logps/chosen": -319.37701416015625, |
|
"logps/rejected": -285.0171813964844, |
|
"loss": 0.3298, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.41146141290664673, |
|
"rewards/margins": 1.4631173610687256, |
|
"rewards/rejected": -1.874578833580017, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": 22.913543701171875, |
|
"logits/rejected": 22.7869873046875, |
|
"logps/chosen": -328.5509948730469, |
|
"logps/rejected": -253.9607696533203, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.4099615216255188, |
|
"rewards/margins": 1.3734912872314453, |
|
"rewards/rejected": -1.7834527492523193, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": 23.1809139251709, |
|
"logits/rejected": 23.06944465637207, |
|
"logps/chosen": -286.20904541015625, |
|
"logps/rejected": -235.44140625, |
|
"loss": 0.324, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5443722009658813, |
|
"rewards/margins": 1.3517777919769287, |
|
"rewards/rejected": -1.8961498737335205, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_logits/chosen": 23.200559616088867, |
|
"eval_logits/rejected": 23.054319381713867, |
|
"eval_logps/chosen": -359.46197509765625, |
|
"eval_logps/rejected": -290.1737060546875, |
|
"eval_loss": 0.5021990537643433, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -0.48918139934539795, |
|
"eval_rewards/margins": 1.1198451519012451, |
|
"eval_rewards/rejected": -1.609026551246643, |
|
"eval_runtime": 211.6095, |
|
"eval_samples_per_second": 9.451, |
|
"eval_steps_per_second": 0.298, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": 22.888259887695312, |
|
"logits/rejected": 22.884002685546875, |
|
"logps/chosen": -351.7564392089844, |
|
"logps/rejected": -299.20611572265625, |
|
"loss": 0.3645, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.16312038898468018, |
|
"rewards/margins": 1.5724434852600098, |
|
"rewards/rejected": -1.73556387424469, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": 23.231523513793945, |
|
"logits/rejected": 23.333255767822266, |
|
"logps/chosen": -338.1724548339844, |
|
"logps/rejected": -284.2950744628906, |
|
"loss": 0.3772, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48969048261642456, |
|
"rewards/margins": 1.1433426141738892, |
|
"rewards/rejected": -1.633033037185669, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": 23.16311264038086, |
|
"logits/rejected": 22.952455520629883, |
|
"logps/chosen": -304.960205078125, |
|
"logps/rejected": -273.458251953125, |
|
"loss": 0.3659, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.46648114919662476, |
|
"rewards/margins": 1.2427117824554443, |
|
"rewards/rejected": -1.7091929912567139, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": 23.104694366455078, |
|
"logits/rejected": 23.080604553222656, |
|
"logps/chosen": -287.4930419921875, |
|
"logps/rejected": -265.7315368652344, |
|
"loss": 0.3395, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0566287636756897, |
|
"rewards/margins": 1.7359685897827148, |
|
"rewards/rejected": -1.7925974130630493, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": 22.983219146728516, |
|
"logits/rejected": 23.094844818115234, |
|
"logps/chosen": -316.54547119140625, |
|
"logps/rejected": -292.8838806152344, |
|
"loss": 0.3579, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4759410321712494, |
|
"rewards/margins": 1.2076809406280518, |
|
"rewards/rejected": -1.6836220026016235, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": 22.804758071899414, |
|
"logits/rejected": 22.753246307373047, |
|
"logps/chosen": -366.3809509277344, |
|
"logps/rejected": -290.44805908203125, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.37290042638778687, |
|
"rewards/margins": 1.3757580518722534, |
|
"rewards/rejected": -1.7486584186553955, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": 22.425283432006836, |
|
"logits/rejected": 22.801471710205078, |
|
"logps/chosen": -316.58416748046875, |
|
"logps/rejected": -268.90423583984375, |
|
"loss": 0.3686, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.44941702485084534, |
|
"rewards/margins": 1.3106696605682373, |
|
"rewards/rejected": -1.7600864171981812, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": 23.177642822265625, |
|
"logits/rejected": 23.025049209594727, |
|
"logps/chosen": -345.25958251953125, |
|
"logps/rejected": -269.18951416015625, |
|
"loss": 0.3337, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.33066803216934204, |
|
"rewards/margins": 1.614101767539978, |
|
"rewards/rejected": -1.9447696208953857, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": 23.251216888427734, |
|
"logits/rejected": 23.127471923828125, |
|
"logps/chosen": -321.37078857421875, |
|
"logps/rejected": -251.3941192626953, |
|
"loss": 0.3158, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3629501461982727, |
|
"rewards/margins": 1.3023030757904053, |
|
"rewards/rejected": -1.6652530431747437, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": 23.11884880065918, |
|
"logits/rejected": 23.025787353515625, |
|
"logps/chosen": -372.3116760253906, |
|
"logps/rejected": -297.9188232421875, |
|
"loss": 0.3556, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08432115614414215, |
|
"rewards/margins": 1.5375818014144897, |
|
"rewards/rejected": -1.6219028234481812, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_logits/chosen": 23.198068618774414, |
|
"eval_logits/rejected": 23.05204963684082, |
|
"eval_logps/chosen": -359.8403625488281, |
|
"eval_logps/rejected": -290.6595458984375, |
|
"eval_loss": 0.5010030269622803, |
|
"eval_rewards/accuracies": 0.7817460298538208, |
|
"eval_rewards/chosen": -0.5270243287086487, |
|
"eval_rewards/margins": 1.130587100982666, |
|
"eval_rewards/rejected": -1.6576114892959595, |
|
"eval_runtime": 208.0836, |
|
"eval_samples_per_second": 9.612, |
|
"eval_steps_per_second": 0.303, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": 22.56097412109375, |
|
"logits/rejected": 22.520360946655273, |
|
"logps/chosen": -299.781982421875, |
|
"logps/rejected": -319.82171630859375, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4395579397678375, |
|
"rewards/margins": 1.4742848873138428, |
|
"rewards/rejected": -1.9138429164886475, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": 23.056285858154297, |
|
"logits/rejected": 22.88377571105957, |
|
"logps/chosen": -274.74249267578125, |
|
"logps/rejected": -217.62075805664062, |
|
"loss": 0.3617, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.48249778151512146, |
|
"rewards/margins": 1.2477834224700928, |
|
"rewards/rejected": -1.730281114578247, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": 23.401355743408203, |
|
"logits/rejected": 23.3753604888916, |
|
"logps/chosen": -305.857666015625, |
|
"logps/rejected": -269.84344482421875, |
|
"loss": 0.3566, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5294305086135864, |
|
"rewards/margins": 1.2906124591827393, |
|
"rewards/rejected": -1.8200428485870361, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": 23.210468292236328, |
|
"logits/rejected": 23.068767547607422, |
|
"logps/chosen": -418.8457946777344, |
|
"logps/rejected": -322.9905700683594, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.12323112785816193, |
|
"rewards/margins": 1.8070284128189087, |
|
"rewards/rejected": -1.9302597045898438, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": 23.159671783447266, |
|
"logits/rejected": 23.031639099121094, |
|
"logps/chosen": -341.6581726074219, |
|
"logps/rejected": -329.7276611328125, |
|
"loss": 0.3489, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.43813997507095337, |
|
"rewards/margins": 1.6730334758758545, |
|
"rewards/rejected": -2.111173152923584, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": 22.655208587646484, |
|
"logits/rejected": 22.458200454711914, |
|
"logps/chosen": -298.61737060546875, |
|
"logps/rejected": -273.9313659667969, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.464535653591156, |
|
"rewards/margins": 1.3690606355667114, |
|
"rewards/rejected": -1.8335964679718018, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": 23.089435577392578, |
|
"logits/rejected": 23.145009994506836, |
|
"logps/chosen": -347.81793212890625, |
|
"logps/rejected": -289.2439880371094, |
|
"loss": 0.3245, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.26666170358657837, |
|
"rewards/margins": 1.7196756601333618, |
|
"rewards/rejected": -1.9863373041152954, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": 23.03936767578125, |
|
"logits/rejected": 22.83783531188965, |
|
"logps/chosen": -326.26123046875, |
|
"logps/rejected": -285.95294189453125, |
|
"loss": 0.3369, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3959008455276489, |
|
"rewards/margins": 1.5528860092163086, |
|
"rewards/rejected": -1.948786735534668, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": 22.73525047302246, |
|
"logits/rejected": 22.791269302368164, |
|
"logps/chosen": -291.1043395996094, |
|
"logps/rejected": -250.67880249023438, |
|
"loss": 0.3344, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.7139278650283813, |
|
"rewards/margins": 1.2345958948135376, |
|
"rewards/rejected": -1.948523759841919, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": 23.139039993286133, |
|
"logits/rejected": 23.094404220581055, |
|
"logps/chosen": -375.664794921875, |
|
"logps/rejected": -297.1061706542969, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.29812633991241455, |
|
"rewards/margins": 1.5680840015411377, |
|
"rewards/rejected": -1.8662105798721313, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_logits/chosen": 23.19009780883789, |
|
"eval_logits/rejected": 23.044872283935547, |
|
"eval_logps/chosen": -359.9708251953125, |
|
"eval_logps/rejected": -290.89959716796875, |
|
"eval_loss": 0.49901142716407776, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.5400659441947937, |
|
"eval_rewards/margins": 1.141547679901123, |
|
"eval_rewards/rejected": -1.6816134452819824, |
|
"eval_runtime": 212.6416, |
|
"eval_samples_per_second": 9.405, |
|
"eval_steps_per_second": 0.296, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": 23.452491760253906, |
|
"logits/rejected": 23.291522979736328, |
|
"logps/chosen": -321.8898010253906, |
|
"logps/rejected": -302.38250732421875, |
|
"loss": 0.3198, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.5151566863059998, |
|
"rewards/margins": 1.5868747234344482, |
|
"rewards/rejected": -2.1020312309265137, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": 22.93613052368164, |
|
"logits/rejected": 23.02700424194336, |
|
"logps/chosen": -337.6763000488281, |
|
"logps/rejected": -263.98406982421875, |
|
"loss": 0.3544, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.38289493322372437, |
|
"rewards/margins": 1.6999647617340088, |
|
"rewards/rejected": -2.0828592777252197, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": 23.027408599853516, |
|
"logits/rejected": 23.09657096862793, |
|
"logps/chosen": -300.5941162109375, |
|
"logps/rejected": -263.323486328125, |
|
"loss": 0.3481, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6709809899330139, |
|
"rewards/margins": 1.2178490161895752, |
|
"rewards/rejected": -1.8888299465179443, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": 23.28525161743164, |
|
"logits/rejected": 23.195045471191406, |
|
"logps/chosen": -272.472900390625, |
|
"logps/rejected": -262.8435974121094, |
|
"loss": 0.3379, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.8382778167724609, |
|
"rewards/margins": 1.3932462930679321, |
|
"rewards/rejected": -2.2315242290496826, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": 22.973094940185547, |
|
"logits/rejected": 22.961816787719727, |
|
"logps/chosen": -367.30596923828125, |
|
"logps/rejected": -294.1488952636719, |
|
"loss": 0.3489, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.19501671195030212, |
|
"rewards/margins": 1.6359647512435913, |
|
"rewards/rejected": -1.8309814929962158, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": 23.02678680419922, |
|
"logits/rejected": 22.784521102905273, |
|
"logps/chosen": -329.62030029296875, |
|
"logps/rejected": -373.6632080078125, |
|
"loss": 0.3306, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.4452723562717438, |
|
"rewards/margins": 1.5809751749038696, |
|
"rewards/rejected": -2.026247501373291, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": 22.902379989624023, |
|
"logits/rejected": 22.912425994873047, |
|
"logps/chosen": -332.11102294921875, |
|
"logps/rejected": -280.44976806640625, |
|
"loss": 0.3247, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.35678499937057495, |
|
"rewards/margins": 1.8047094345092773, |
|
"rewards/rejected": -2.161494493484497, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": 23.08858871459961, |
|
"logits/rejected": 22.95041275024414, |
|
"logps/chosen": -337.7413330078125, |
|
"logps/rejected": -293.63623046875, |
|
"loss": 0.3618, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3582797646522522, |
|
"rewards/margins": 1.579685091972351, |
|
"rewards/rejected": -1.9379650354385376, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": 22.903911590576172, |
|
"logits/rejected": 22.945873260498047, |
|
"logps/chosen": -272.99493408203125, |
|
"logps/rejected": -277.4879455566406, |
|
"loss": 0.3657, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6198464035987854, |
|
"rewards/margins": 1.3183465003967285, |
|
"rewards/rejected": -1.9381929636001587, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": 23.07727813720703, |
|
"logits/rejected": 22.927719116210938, |
|
"logps/chosen": -282.8468933105469, |
|
"logps/rejected": -237.0935821533203, |
|
"loss": 0.3262, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.40676426887512207, |
|
"rewards/margins": 1.3733875751495361, |
|
"rewards/rejected": -1.7801517248153687, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_logits/chosen": 23.187774658203125, |
|
"eval_logits/rejected": 23.043867111206055, |
|
"eval_logps/chosen": -359.5220031738281, |
|
"eval_logps/rejected": -290.49322509765625, |
|
"eval_loss": 0.4993184804916382, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.4951845407485962, |
|
"eval_rewards/margins": 1.1457940340042114, |
|
"eval_rewards/rejected": -1.6409783363342285, |
|
"eval_runtime": 210.8376, |
|
"eval_samples_per_second": 9.486, |
|
"eval_steps_per_second": 0.299, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": 23.060585021972656, |
|
"logits/rejected": 22.836994171142578, |
|
"logps/chosen": -348.59539794921875, |
|
"logps/rejected": -282.60064697265625, |
|
"loss": 0.3585, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4623526632785797, |
|
"rewards/margins": 1.4493197202682495, |
|
"rewards/rejected": -1.911672592163086, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": 23.332260131835938, |
|
"logits/rejected": 23.22934341430664, |
|
"logps/chosen": -373.9750061035156, |
|
"logps/rejected": -321.8055725097656, |
|
"loss": 0.334, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.24776187539100647, |
|
"rewards/margins": 1.6318897008895874, |
|
"rewards/rejected": -1.879651427268982, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": 23.10513687133789, |
|
"logits/rejected": 23.070053100585938, |
|
"logps/chosen": -295.7916259765625, |
|
"logps/rejected": -298.29132080078125, |
|
"loss": 0.3567, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4222725033760071, |
|
"rewards/margins": 1.4338531494140625, |
|
"rewards/rejected": -1.8561254739761353, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": 22.993267059326172, |
|
"logits/rejected": 22.975433349609375, |
|
"logps/chosen": -340.28515625, |
|
"logps/rejected": -270.3987731933594, |
|
"loss": 0.3505, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4524230360984802, |
|
"rewards/margins": 1.500270962715149, |
|
"rewards/rejected": -1.9526941776275635, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": 22.807130813598633, |
|
"logits/rejected": 22.657257080078125, |
|
"logps/chosen": -302.7679748535156, |
|
"logps/rejected": -253.1012420654297, |
|
"loss": 0.3457, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3587990403175354, |
|
"rewards/margins": 1.4861528873443604, |
|
"rewards/rejected": -1.8449519872665405, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": 22.716732025146484, |
|
"logits/rejected": 22.806201934814453, |
|
"logps/chosen": -340.51287841796875, |
|
"logps/rejected": -296.96673583984375, |
|
"loss": 0.3386, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4456048011779785, |
|
"rewards/margins": 1.4643114805221558, |
|
"rewards/rejected": -1.9099165201187134, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": 23.153486251831055, |
|
"logits/rejected": 23.201038360595703, |
|
"logps/chosen": -308.52288818359375, |
|
"logps/rejected": -289.1993408203125, |
|
"loss": 0.3403, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3693477213382721, |
|
"rewards/margins": 1.4097144603729248, |
|
"rewards/rejected": -1.779062032699585, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": 22.77389907836914, |
|
"logits/rejected": 22.64432144165039, |
|
"logps/chosen": -388.85552978515625, |
|
"logps/rejected": -363.8034362792969, |
|
"loss": 0.3601, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.32134681940078735, |
|
"rewards/margins": 1.4628154039382935, |
|
"rewards/rejected": -1.784161925315857, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": 22.792617797851562, |
|
"logits/rejected": 22.739648818969727, |
|
"logps/chosen": -340.0162353515625, |
|
"logps/rejected": -269.2567443847656, |
|
"loss": 0.3476, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.3537456691265106, |
|
"rewards/margins": 1.403322696685791, |
|
"rewards/rejected": -1.7570682764053345, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": 23.177873611450195, |
|
"logits/rejected": 23.13758087158203, |
|
"logps/chosen": -343.5755615234375, |
|
"logps/rejected": -288.75555419921875, |
|
"loss": 0.3566, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.39105096459388733, |
|
"rewards/margins": 1.0802559852600098, |
|
"rewards/rejected": -1.4713070392608643, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_logits/chosen": 23.187063217163086, |
|
"eval_logits/rejected": 23.043275833129883, |
|
"eval_logps/chosen": -359.0445251464844, |
|
"eval_logps/rejected": -290.0010070800781, |
|
"eval_loss": 0.4985302686691284, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -0.44743794202804565, |
|
"eval_rewards/margins": 1.144317388534546, |
|
"eval_rewards/rejected": -1.5917555093765259, |
|
"eval_runtime": 208.7121, |
|
"eval_samples_per_second": 9.583, |
|
"eval_steps_per_second": 0.302, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": 22.997955322265625, |
|
"logits/rejected": 23.055164337158203, |
|
"logps/chosen": -362.84918212890625, |
|
"logps/rejected": -298.51922607421875, |
|
"loss": 0.3433, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.2676233649253845, |
|
"rewards/margins": 1.3830516338348389, |
|
"rewards/rejected": -1.650674819946289, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": 23.13959312438965, |
|
"logits/rejected": 22.91689682006836, |
|
"logps/chosen": -358.01666259765625, |
|
"logps/rejected": -246.228515625, |
|
"loss": 0.3319, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.24576838314533234, |
|
"rewards/margins": 1.788368582725525, |
|
"rewards/rejected": -2.03413724899292, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": 22.98459243774414, |
|
"logits/rejected": 23.025390625, |
|
"logps/chosen": -346.28973388671875, |
|
"logps/rejected": -279.1742858886719, |
|
"loss": 0.3331, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.40038982033729553, |
|
"rewards/margins": 1.4257802963256836, |
|
"rewards/rejected": -1.8261702060699463, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": 23.19771957397461, |
|
"logits/rejected": 23.1368408203125, |
|
"logps/chosen": -349.0854187011719, |
|
"logps/rejected": -281.1717529296875, |
|
"loss": 0.3685, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.481764018535614, |
|
"rewards/margins": 1.4477870464324951, |
|
"rewards/rejected": -1.929551124572754, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": 23.161651611328125, |
|
"logits/rejected": 23.000064849853516, |
|
"logps/chosen": -338.02288818359375, |
|
"logps/rejected": -283.4983215332031, |
|
"loss": 0.3501, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.45347729325294495, |
|
"rewards/margins": 1.2738498449325562, |
|
"rewards/rejected": -1.7273271083831787, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": 22.9562931060791, |
|
"logits/rejected": 22.93158531188965, |
|
"logps/chosen": -301.29132080078125, |
|
"logps/rejected": -239.3927001953125, |
|
"loss": 0.3382, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.4900715947151184, |
|
"rewards/margins": 1.371382713317871, |
|
"rewards/rejected": -1.8614543676376343, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": 22.94473648071289, |
|
"logits/rejected": 23.008617401123047, |
|
"logps/chosen": -302.16436767578125, |
|
"logps/rejected": -269.48828125, |
|
"loss": 0.3549, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.308788925409317, |
|
"rewards/margins": 1.3573650121688843, |
|
"rewards/rejected": -1.666154146194458, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": 23.0222225189209, |
|
"logits/rejected": 22.979480743408203, |
|
"logps/chosen": -329.9389343261719, |
|
"logps/rejected": -272.41351318359375, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.35177531838417053, |
|
"rewards/margins": 1.4292513132095337, |
|
"rewards/rejected": -1.781026840209961, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": 23.136159896850586, |
|
"logits/rejected": 23.02133560180664, |
|
"logps/chosen": -328.77093505859375, |
|
"logps/rejected": -275.63995361328125, |
|
"loss": 0.3498, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.4196252226829529, |
|
"rewards/margins": 1.4171664714813232, |
|
"rewards/rejected": -1.8367916345596313, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": 23.257701873779297, |
|
"logits/rejected": 23.05466079711914, |
|
"logps/chosen": -311.91217041015625, |
|
"logps/rejected": -304.32501220703125, |
|
"loss": 0.3386, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.36090317368507385, |
|
"rewards/margins": 1.4512748718261719, |
|
"rewards/rejected": -1.8121780157089233, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_logits/chosen": 23.18655014038086, |
|
"eval_logits/rejected": 23.042728424072266, |
|
"eval_logps/chosen": -359.16790771484375, |
|
"eval_logps/rejected": -290.12347412109375, |
|
"eval_loss": 0.4982847273349762, |
|
"eval_rewards/accuracies": 0.7817460298538208, |
|
"eval_rewards/chosen": -0.4597766697406769, |
|
"eval_rewards/margins": 1.144227385520935, |
|
"eval_rewards/rejected": -1.6040042638778687, |
|
"eval_runtime": 212.9399, |
|
"eval_samples_per_second": 9.392, |
|
"eval_steps_per_second": 0.296, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.446941960284861, |
|
"train_runtime": 57869.3533, |
|
"train_samples_per_second": 3.212, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|