|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 800000000, |
|
"global_step": 835, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 5.952380952380953e-08, |
|
"logits/chosen": -3.4845848083496094, |
|
"logits/rejected": -3.85036301612854, |
|
"logps/chosen": -306.50885009765625, |
|
"logps/rejected": -197.74395751953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/diff": -0.625, |
|
"rewards/diff_abs": 0.625, |
|
"rewards/rejected": 0.0, |
|
"rewards/student_margin": 0.0, |
|
"rewards/teacher_margin": 0.625, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 5.952380952380953e-07, |
|
"logits/chosen": -3.4539499282836914, |
|
"logits/rejected": -3.5230212211608887, |
|
"logps/chosen": -201.3124237060547, |
|
"logps/rejected": -183.91929626464844, |
|
"loss": 0.7251, |
|
"rewards/accuracies": 0.5185185670852661, |
|
"rewards/chosen": 0.06644736230373383, |
|
"rewards/diff": -0.6283153891563416, |
|
"rewards/diff_abs": 0.7078281044960022, |
|
"rewards/rejected": 0.013049829751253128, |
|
"rewards/student_margin": 0.0533975288271904, |
|
"rewards/teacher_margin": 0.6817129254341125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 15.0, |
|
"learning_rate": 1.1904761904761906e-06, |
|
"logits/chosen": -3.593590259552002, |
|
"logits/rejected": -3.5751953125, |
|
"logps/chosen": -218.2281951904297, |
|
"logps/rejected": -209.72158813476562, |
|
"loss": 0.7314, |
|
"rewards/accuracies": 0.36666667461395264, |
|
"rewards/chosen": -0.14473959803581238, |
|
"rewards/diff": -1.088902235031128, |
|
"rewards/diff_abs": 1.216476321220398, |
|
"rewards/rejected": -0.044899843633174896, |
|
"rewards/student_margin": -0.09983976185321808, |
|
"rewards/teacher_margin": 0.9890626072883606, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.875, |
|
"learning_rate": 1.7857142857142859e-06, |
|
"logits/chosen": -3.489861249923706, |
|
"logits/rejected": -3.60286283493042, |
|
"logps/chosen": -259.5788269042969, |
|
"logps/rejected": -200.3897705078125, |
|
"loss": 0.7006, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.16129140555858612, |
|
"rewards/diff": -0.9457392692565918, |
|
"rewards/diff_abs": 0.9774287343025208, |
|
"rewards/rejected": 0.19505144655704498, |
|
"rewards/student_margin": -0.03376004844903946, |
|
"rewards/teacher_margin": 0.911979079246521, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 13.125, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": -3.4493842124938965, |
|
"logits/rejected": -3.5313167572021484, |
|
"logps/chosen": -296.2957458496094, |
|
"logps/rejected": -205.90768432617188, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.4081878662109375, |
|
"rewards/diff": -0.8339607119560242, |
|
"rewards/diff_abs": 0.9871258735656738, |
|
"rewards/rejected": 0.23329439759254456, |
|
"rewards/student_margin": 0.17489352822303772, |
|
"rewards/teacher_margin": 1.0088541507720947, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"logits/chosen": -3.627382755279541, |
|
"logits/rejected": -3.624690294265747, |
|
"logps/chosen": -232.656494140625, |
|
"logps/rejected": -218.9987335205078, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 0.6538265943527222, |
|
"rewards/diff": -1.1430647373199463, |
|
"rewards/diff_abs": 1.2350889444351196, |
|
"rewards/rejected": 0.550537109375, |
|
"rewards/student_margin": 0.10328948497772217, |
|
"rewards/teacher_margin": 1.2463542222976685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"logits/chosen": -3.5310890674591064, |
|
"logits/rejected": -3.5235633850097656, |
|
"logps/chosen": -278.9076232910156, |
|
"logps/rejected": -228.38461303710938, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 1.3353300094604492, |
|
"rewards/diff": -0.48476704955101013, |
|
"rewards/diff_abs": 0.8795832395553589, |
|
"rewards/rejected": 0.9247845411300659, |
|
"rewards/student_margin": 0.4105454385280609, |
|
"rewards/teacher_margin": 0.895312488079071, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -3.580937147140503, |
|
"logits/rejected": -3.5811400413513184, |
|
"logps/chosen": -300.9478454589844, |
|
"logps/rejected": -296.41937255859375, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.321845293045044, |
|
"rewards/diff": -0.41078656911849976, |
|
"rewards/diff_abs": 0.8033410906791687, |
|
"rewards/rejected": 0.977423369884491, |
|
"rewards/student_margin": 0.34442177414894104, |
|
"rewards/teacher_margin": 0.7552083730697632, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.875, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": -3.363053560256958, |
|
"logits/rejected": -3.429394483566284, |
|
"logps/chosen": -307.85614013671875, |
|
"logps/rejected": -194.5691680908203, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": 1.5754692554473877, |
|
"rewards/diff": -0.049827940762043, |
|
"rewards/diff_abs": 1.0208818912506104, |
|
"rewards/rejected": 0.6659221649169922, |
|
"rewards/student_margin": 0.9095471501350403, |
|
"rewards/teacher_margin": 0.9593750238418579, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.625, |
|
"learning_rate": 4.9992125742993825e-06, |
|
"logits/chosen": -3.5306942462921143, |
|
"logits/rejected": -3.4903416633605957, |
|
"logps/chosen": -306.61328125, |
|
"logps/rejected": -260.5257873535156, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": 1.7541630268096924, |
|
"rewards/diff": -0.37054818868637085, |
|
"rewards/diff_abs": 1.008590817451477, |
|
"rewards/rejected": 1.359086275100708, |
|
"rewards/student_margin": 0.39507681131362915, |
|
"rewards/teacher_margin": 0.765625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 4.994402324561469e-06, |
|
"logits/chosen": -3.487095594406128, |
|
"logits/rejected": -3.4807047843933105, |
|
"logps/chosen": -291.501953125, |
|
"logps/rejected": -213.4379425048828, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 1.6134860515594482, |
|
"rewards/diff": -0.03169644996523857, |
|
"rewards/diff_abs": 0.517871618270874, |
|
"rewards/rejected": 0.8097659349441528, |
|
"rewards/student_margin": 0.8037201762199402, |
|
"rewards/teacher_margin": 0.8354166746139526, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 4.985227689958313e-06, |
|
"logits/chosen": -3.4644827842712402, |
|
"logits/rejected": -3.5029213428497314, |
|
"logps/chosen": -310.9401550292969, |
|
"logps/rejected": -203.2042999267578, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": 1.3005656003952026, |
|
"rewards/diff": -0.4949645400047302, |
|
"rewards/diff_abs": 0.8325679898262024, |
|
"rewards/rejected": 0.9288633465766907, |
|
"rewards/student_margin": 0.3717021346092224, |
|
"rewards/teacher_margin": 0.8666666746139526, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.375, |
|
"learning_rate": 4.97170472308737e-06, |
|
"logits/chosen": -3.5512795448303223, |
|
"logits/rejected": -3.5486133098602295, |
|
"logps/chosen": -240.02197265625, |
|
"logps/rejected": -220.6559600830078, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 1.3574118614196777, |
|
"rewards/diff": -0.24382737278938293, |
|
"rewards/diff_abs": 1.0172048807144165, |
|
"rewards/rejected": 0.7210308909416199, |
|
"rewards/student_margin": 0.6363809704780579, |
|
"rewards/teacher_margin": 0.8802083134651184, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 4.953857084699501e-06, |
|
"logits/chosen": -3.4069736003875732, |
|
"logits/rejected": -3.45965313911438, |
|
"logps/chosen": -239.0903778076172, |
|
"logps/rejected": -190.62875366210938, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 1.439429521560669, |
|
"rewards/diff": -0.09966392815113068, |
|
"rewards/diff_abs": 0.7962394952774048, |
|
"rewards/rejected": 0.7729476690292358, |
|
"rewards/student_margin": 0.6664819121360779, |
|
"rewards/teacher_margin": 0.7661458253860474, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 4.931716002300424e-06, |
|
"logits/chosen": -3.446927547454834, |
|
"logits/rejected": -3.4422965049743652, |
|
"logps/chosen": -305.3811950683594, |
|
"logps/rejected": -268.9550476074219, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.7999999523162842, |
|
"rewards/chosen": 1.7481582164764404, |
|
"rewards/diff": 0.060656942427158356, |
|
"rewards/diff_abs": 0.8949319124221802, |
|
"rewards/rejected": 0.8583346605300903, |
|
"rewards/student_margin": 0.8898237347602844, |
|
"rewards/teacher_margin": 0.8291667103767395, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 4.905320215512843e-06, |
|
"logits/chosen": -3.3709404468536377, |
|
"logits/rejected": -3.4576239585876465, |
|
"logps/chosen": -273.4628601074219, |
|
"logps/rejected": -242.08724975585938, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.9606747627258301, |
|
"rewards/diff": -0.34713083505630493, |
|
"rewards/diff_abs": 0.9387839436531067, |
|
"rewards/rejected": 0.5515555143356323, |
|
"rewards/student_margin": 0.409119188785553, |
|
"rewards/teacher_margin": 0.7562500238418579, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 4.874715908294827e-06, |
|
"logits/chosen": -3.4495322704315186, |
|
"logits/rejected": -3.4219632148742676, |
|
"logps/chosen": -236.69869995117188, |
|
"logps/rejected": -200.24969482421875, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.1124448776245117, |
|
"rewards/diff": -0.28646618127822876, |
|
"rewards/diff_abs": 0.8947150111198425, |
|
"rewards/rejected": 0.5187025666236877, |
|
"rewards/student_margin": 0.5937421917915344, |
|
"rewards/teacher_margin": 0.8802083730697632, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 4.839956628133049e-06, |
|
"logits/chosen": -3.4103050231933594, |
|
"logits/rejected": -3.464110851287842, |
|
"logps/chosen": -237.78280639648438, |
|
"logps/rejected": -208.2376708984375, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 1.213085651397705, |
|
"rewards/diff": -0.21489715576171875, |
|
"rewards/diff_abs": 1.0237081050872803, |
|
"rewards/rejected": 0.386316180229187, |
|
"rewards/student_margin": 0.8267695307731628, |
|
"rewards/teacher_margin": 1.0416667461395264, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.25, |
|
"learning_rate": 4.801103192352272e-06, |
|
"logits/chosen": -3.5754635334014893, |
|
"logits/rejected": -3.633957624435425, |
|
"logps/chosen": -344.4823303222656, |
|
"logps/rejected": -243.0480499267578, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 1.8415803909301758, |
|
"rewards/diff": -0.09891305863857269, |
|
"rewards/diff_abs": 1.243789553642273, |
|
"rewards/rejected": 0.9873684048652649, |
|
"rewards/student_margin": 0.8542119860649109, |
|
"rewards/teacher_margin": 0.9531251192092896, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.25, |
|
"learning_rate": 4.758223581705006e-06, |
|
"logits/chosen": -3.512629747390747, |
|
"logits/rejected": -3.5428214073181152, |
|
"logps/chosen": -243.7911376953125, |
|
"logps/rejected": -196.57791137695312, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 1.4310871362686157, |
|
"rewards/diff": -0.1522880345582962, |
|
"rewards/diff_abs": 1.0001410245895386, |
|
"rewards/rejected": 0.8344168663024902, |
|
"rewards/student_margin": 0.5966703295707703, |
|
"rewards/teacher_margin": 0.7489583492279053, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 12.75, |
|
"learning_rate": 4.711392821427515e-06, |
|
"logits/chosen": -3.6087615489959717, |
|
"logits/rejected": -3.622082233428955, |
|
"logps/chosen": -233.5066680908203, |
|
"logps/rejected": -160.3419647216797, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.0799269676208496, |
|
"rewards/diff": -0.27179113030433655, |
|
"rewards/diff_abs": 0.9324356913566589, |
|
"rewards/rejected": 0.15380141139030457, |
|
"rewards/student_margin": 0.9261256456375122, |
|
"rewards/teacher_margin": 1.1979167461395264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 4.6606928499702905e-06, |
|
"logits/chosen": -3.5973472595214844, |
|
"logits/rejected": -3.656515598297119, |
|
"logps/chosen": -237.35546875, |
|
"logps/rejected": -227.3077392578125, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.372194766998291, |
|
"rewards/diff": -0.45501255989074707, |
|
"rewards/diff_abs": 1.0200004577636719, |
|
"rewards/rejected": 0.9282490611076355, |
|
"rewards/student_margin": 0.4439457952976227, |
|
"rewards/teacher_margin": 0.8989583849906921, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.5, |
|
"learning_rate": 4.606212375632682e-06, |
|
"logits/chosen": -3.341809034347534, |
|
"logits/rejected": -3.4072697162628174, |
|
"logps/chosen": -242.65316772460938, |
|
"logps/rejected": -186.21214294433594, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.7999999523162842, |
|
"rewards/chosen": 1.5010632276535034, |
|
"rewards/diff": 0.02491099201142788, |
|
"rewards/diff_abs": 1.0156395435333252, |
|
"rewards/rejected": 0.4521939158439636, |
|
"rewards/student_margin": 1.048869252204895, |
|
"rewards/teacher_margin": 1.023958444595337, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 4.5480467213524935e-06, |
|
"logits/chosen": -3.4449222087860107, |
|
"logits/rejected": -3.4908764362335205, |
|
"logps/chosen": -260.27532958984375, |
|
"logps/rejected": -249.1790313720703, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 1.5822021961212158, |
|
"rewards/diff": -0.133940190076828, |
|
"rewards/diff_abs": 0.9377338290214539, |
|
"rewards/rejected": 0.8869755864143372, |
|
"rewards/student_margin": 0.6952265501022339, |
|
"rewards/teacher_margin": 0.8291667699813843, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 10.75, |
|
"learning_rate": 4.4862976579221605e-06, |
|
"logits/chosen": -3.4081084728240967, |
|
"logits/rejected": -3.435927152633667, |
|
"logps/chosen": -305.90277099609375, |
|
"logps/rejected": -222.0186767578125, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": 1.8166110515594482, |
|
"rewards/diff": -0.12463061511516571, |
|
"rewards/diff_abs": 1.1340343952178955, |
|
"rewards/rejected": 0.781866729259491, |
|
"rewards/student_margin": 1.034744381904602, |
|
"rewards/teacher_margin": 1.1593749523162842, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 4.421073225923276e-06, |
|
"logits/chosen": -3.4236435890197754, |
|
"logits/rejected": -3.5582706928253174, |
|
"logps/chosen": -304.5841064453125, |
|
"logps/rejected": -224.82040405273438, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.731606125831604, |
|
"rewards/diff": 0.09823840111494064, |
|
"rewards/diff_abs": 1.1416826248168945, |
|
"rewards/rejected": 0.6896177530288696, |
|
"rewards/student_margin": 1.0419883728027344, |
|
"rewards/teacher_margin": 0.9437500238418579, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 4.3524875466910634e-06, |
|
"logits/chosen": -3.3874142169952393, |
|
"logits/rejected": -3.38875150680542, |
|
"logps/chosen": -248.728271484375, |
|
"logps/rejected": -241.2711181640625, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.1090809106826782, |
|
"rewards/diff": -0.14672747254371643, |
|
"rewards/diff_abs": 0.8784782290458679, |
|
"rewards/rejected": 0.7037249803543091, |
|
"rewards/student_margin": 0.405355840921402, |
|
"rewards/teacher_margin": 0.5520833730697632, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 10.5, |
|
"learning_rate": 4.280660622639513e-06, |
|
"logits/chosen": -3.518489122390747, |
|
"logits/rejected": -3.5266849994659424, |
|
"logps/chosen": -238.49270629882812, |
|
"logps/rejected": -191.0264129638672, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 1.5766558647155762, |
|
"rewards/diff": 0.2415703982114792, |
|
"rewards/diff_abs": 0.9685176014900208, |
|
"rewards/rejected": 0.47050219774246216, |
|
"rewards/student_margin": 1.1061537265777588, |
|
"rewards/teacher_margin": 0.8645833730697632, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.375, |
|
"learning_rate": 4.205718127296574e-06, |
|
"logits/chosen": -3.5537657737731934, |
|
"logits/rejected": -3.529198169708252, |
|
"logps/chosen": -241.38253784179688, |
|
"logps/rejected": -211.21163940429688, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": 1.566563367843628, |
|
"rewards/diff": -0.13755005598068237, |
|
"rewards/diff_abs": 1.2915265560150146, |
|
"rewards/rejected": 0.8463010787963867, |
|
"rewards/student_margin": 0.7202624678611755, |
|
"rewards/teacher_margin": 0.8578125238418579, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 4.127791185416747e-06, |
|
"logits/chosen": -3.4216790199279785, |
|
"logits/rejected": -3.4342334270477295, |
|
"logps/chosen": -219.7965087890625, |
|
"logps/rejected": -173.47998046875, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": 1.3700611591339111, |
|
"rewards/diff": -0.2189801037311554, |
|
"rewards/diff_abs": 1.011496901512146, |
|
"rewards/rejected": 0.6609162092208862, |
|
"rewards/student_margin": 0.7091449499130249, |
|
"rewards/teacher_margin": 0.9281250834465027, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.047016143555834e-06, |
|
"logits/chosen": -3.4285099506378174, |
|
"logits/rejected": -3.44201397895813, |
|
"logps/chosen": -247.718994140625, |
|
"logps/rejected": -208.1968231201172, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.704085350036621, |
|
"rewards/diff": 0.11051769554615021, |
|
"rewards/diff_abs": 0.9950829744338989, |
|
"rewards/rejected": 0.6805468201637268, |
|
"rewards/student_margin": 1.023538589477539, |
|
"rewards/teacher_margin": 0.91302090883255, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 11.375, |
|
"learning_rate": 3.9635343315092374e-06, |
|
"logits/chosen": -3.350679874420166, |
|
"logits/rejected": -3.487694263458252, |
|
"logps/chosen": -243.7193603515625, |
|
"logps/rejected": -210.34561157226562, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": 1.3691414594650269, |
|
"rewards/diff": -0.047634802758693695, |
|
"rewards/diff_abs": 1.2383002042770386, |
|
"rewards/rejected": 0.43865126371383667, |
|
"rewards/student_margin": 0.9304901957511902, |
|
"rewards/teacher_margin": 0.9781249165534973, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 11.0, |
|
"learning_rate": 3.877491815031241e-06, |
|
"logits/chosen": -3.520355701446533, |
|
"logits/rejected": -3.64158296585083, |
|
"logps/chosen": -258.4951171875, |
|
"logps/rejected": -180.27655029296875, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 1.5588480234146118, |
|
"rewards/diff": 0.2940705418586731, |
|
"rewards/diff_abs": 0.8084346055984497, |
|
"rewards/rejected": 0.40748587250709534, |
|
"rewards/student_margin": 1.1513621807098389, |
|
"rewards/teacher_margin": 0.8572916984558105, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.789039140267903e-06, |
|
"logits/chosen": -3.6287574768066406, |
|
"logits/rejected": -3.6443278789520264, |
|
"logps/chosen": -239.03488159179688, |
|
"logps/rejected": -204.2160186767578, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": 1.4275104999542236, |
|
"rewards/diff": 0.07205963134765625, |
|
"rewards/diff_abs": 1.0453150272369385, |
|
"rewards/rejected": 0.3346175253391266, |
|
"rewards/student_margin": 1.0928928852081299, |
|
"rewards/teacher_margin": 1.0208333730697632, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 3.6983310703507475e-06, |
|
"logits/chosen": -3.4879977703094482, |
|
"logits/rejected": -3.631270170211792, |
|
"logps/chosen": -316.2113342285156, |
|
"logps/rejected": -292.9886474609375, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 1.9592492580413818, |
|
"rewards/diff": 0.0617034025490284, |
|
"rewards/diff_abs": 1.0368849039077759, |
|
"rewards/rejected": 1.1829627752304077, |
|
"rewards/student_margin": 0.7762867212295532, |
|
"rewards/teacher_margin": 0.7145833969116211, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.6055263146121062e-06, |
|
"logits/chosen": -3.4843573570251465, |
|
"logits/rejected": -3.5558838844299316, |
|
"logps/chosen": -243.1865234375, |
|
"logps/rejected": -191.44906616210938, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": 1.671415090560913, |
|
"rewards/diff": 0.22389063239097595, |
|
"rewards/diff_abs": 1.1865875720977783, |
|
"rewards/rejected": 0.6318994760513306, |
|
"rewards/student_margin": 1.0395156145095825, |
|
"rewards/teacher_margin": 0.815625011920929, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 11.75, |
|
"learning_rate": 3.5107872508959144e-06, |
|
"logits/chosen": -3.551055908203125, |
|
"logits/rejected": -3.672009229660034, |
|
"logps/chosen": -303.6122741699219, |
|
"logps/rejected": -230.38363647460938, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": 1.3571428060531616, |
|
"rewards/diff": 0.1063896045088768, |
|
"rewards/diff_abs": 1.227370023727417, |
|
"rewards/rejected": 0.45752400159835815, |
|
"rewards/student_margin": 0.8996188044548035, |
|
"rewards/teacher_margin": 0.7932292222976685, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 10.875, |
|
"learning_rate": 3.414279641449809e-06, |
|
"logits/chosen": -3.435415744781494, |
|
"logits/rejected": -3.4730231761932373, |
|
"logps/chosen": -295.2155456542969, |
|
"logps/rejected": -237.608642578125, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.6991815567016602, |
|
"rewards/diff": -0.06112980842590332, |
|
"rewards/diff_abs": 1.0851820707321167, |
|
"rewards/rejected": 0.8478114008903503, |
|
"rewards/student_margin": 0.8513702154159546, |
|
"rewards/teacher_margin": 0.9125000238418579, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.25, |
|
"learning_rate": 3.3161723428956356e-06, |
|
"logits/chosen": -3.3455491065979004, |
|
"logits/rejected": -3.498779296875, |
|
"logps/chosen": -304.9415283203125, |
|
"logps/rejected": -242.94873046875, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.997698426246643, |
|
"rewards/diff": 0.04048812389373779, |
|
"rewards/diff_abs": 1.1324741840362549, |
|
"rewards/rejected": 0.8811686635017395, |
|
"rewards/student_margin": 1.1165297031402588, |
|
"rewards/teacher_margin": 1.0760416984558105, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 10.625, |
|
"learning_rate": 3.216637010785813e-06, |
|
"logits/chosen": -3.564321994781494, |
|
"logits/rejected": -3.5550827980041504, |
|
"logps/chosen": -323.22161865234375, |
|
"logps/rejected": -285.3416442871094, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.76666659116745, |
|
"rewards/chosen": 2.0031332969665527, |
|
"rewards/diff": 0.2937852442264557, |
|
"rewards/diff_abs": 1.2637544870376587, |
|
"rewards/rejected": 0.8124731183052063, |
|
"rewards/student_margin": 1.1906602382659912, |
|
"rewards/teacher_margin": 0.8968750238418579, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 3.115847799262494e-06, |
|
"logits/chosen": -3.467402696609497, |
|
"logits/rejected": -3.590373992919922, |
|
"logps/chosen": -257.94512939453125, |
|
"logps/rejected": -220.92965698242188, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": 1.5651861429214478, |
|
"rewards/diff": 0.25105172395706177, |
|
"rewards/diff_abs": 0.9998427629470825, |
|
"rewards/rejected": 0.43444690108299255, |
|
"rewards/student_margin": 1.1307392120361328, |
|
"rewards/teacher_margin": 0.879687488079071, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 10.25, |
|
"learning_rate": 3.0139810563450094e-06, |
|
"logits/chosen": -3.6093788146972656, |
|
"logits/rejected": -3.6794228553771973, |
|
"logps/chosen": -293.86090087890625, |
|
"logps/rejected": -235.68692016601562, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": 1.6038920879364014, |
|
"rewards/diff": 0.06115199252963066, |
|
"rewards/diff_abs": 0.9691001772880554, |
|
"rewards/rejected": 0.7916983366012573, |
|
"rewards/student_margin": 0.8121936917304993, |
|
"rewards/teacher_margin": 0.7510417103767395, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 2.911215015378752e-06, |
|
"logits/chosen": -3.5684292316436768, |
|
"logits/rejected": -3.6296639442443848, |
|
"logps/chosen": -225.4886016845703, |
|
"logps/rejected": -186.40719604492188, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 1.4102319478988647, |
|
"rewards/diff": 0.22086882591247559, |
|
"rewards/diff_abs": 1.04868483543396, |
|
"rewards/rejected": 0.43415483832359314, |
|
"rewards/student_margin": 0.9760771989822388, |
|
"rewards/teacher_margin": 0.7552083730697632, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 10.875, |
|
"learning_rate": 2.8077294831853547e-06, |
|
"logits/chosen": -3.450024127960205, |
|
"logits/rejected": -3.508530378341675, |
|
"logps/chosen": -287.51263427734375, |
|
"logps/rejected": -215.53939819335938, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 1.462050199508667, |
|
"rewards/diff": -0.2909145951271057, |
|
"rewards/diff_abs": 1.0944832563400269, |
|
"rewards/rejected": 0.7868188619613647, |
|
"rewards/student_margin": 0.6752313375473022, |
|
"rewards/teacher_margin": 0.9661458134651184, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 2.703705525459806e-06, |
|
"logits/chosen": -3.5202553272247314, |
|
"logits/rejected": -3.5470759868621826, |
|
"logps/chosen": -221.18173217773438, |
|
"logps/rejected": -204.56344604492188, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 1.5964858531951904, |
|
"rewards/diff": 0.1700429618358612, |
|
"rewards/diff_abs": 0.6282828450202942, |
|
"rewards/rejected": 0.6587344408035278, |
|
"rewards/student_margin": 0.9377514123916626, |
|
"rewards/teacher_margin": 0.767708420753479, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 11.5, |
|
"learning_rate": 2.599325149964946e-06, |
|
"logits/chosen": -3.427098512649536, |
|
"logits/rejected": -3.5964770317077637, |
|
"logps/chosen": -338.41900634765625, |
|
"logps/rejected": -305.21978759765625, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 1.980444312095642, |
|
"rewards/diff": -0.2440481185913086, |
|
"rewards/diff_abs": 0.9519003033638, |
|
"rewards/rejected": 1.4953259229660034, |
|
"rewards/student_margin": 0.485118567943573, |
|
"rewards/teacher_margin": 0.7291667461395264, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.875, |
|
"learning_rate": 2.4947709880776607e-06, |
|
"logits/chosen": -3.465344190597534, |
|
"logits/rejected": -3.593451738357544, |
|
"logps/chosen": -249.97262573242188, |
|
"logps/rejected": -215.36184692382812, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 1.3433548212051392, |
|
"rewards/diff": 0.33395522832870483, |
|
"rewards/diff_abs": 1.4617677927017212, |
|
"rewards/rejected": 0.1708579957485199, |
|
"rewards/student_margin": 1.172497034072876, |
|
"rewards/teacher_margin": 0.8385416865348816, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 10.5, |
|
"learning_rate": 2.3902259752439462e-06, |
|
"logits/chosen": -3.506533145904541, |
|
"logits/rejected": -3.5754833221435547, |
|
"logps/chosen": -280.00299072265625, |
|
"logps/rejected": -243.15451049804688, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.4691600799560547, |
|
"rewards/diff": -0.009477054700255394, |
|
"rewards/diff_abs": 1.2979676723480225, |
|
"rewards/rejected": 0.6395747661590576, |
|
"rewards/student_margin": 0.8295854330062866, |
|
"rewards/teacher_margin": 0.839062511920929, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 11.125, |
|
"learning_rate": 2.2858730309019594e-06, |
|
"logits/chosen": -3.401517868041992, |
|
"logits/rejected": -3.449411392211914, |
|
"logps/chosen": -333.2916564941406, |
|
"logps/rejected": -242.51858520507812, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 2.0638175010681152, |
|
"rewards/diff": 0.23633404076099396, |
|
"rewards/diff_abs": 1.1042144298553467, |
|
"rewards/rejected": 0.9806085824966431, |
|
"rewards/student_margin": 1.0832091569900513, |
|
"rewards/teacher_margin": 0.846875011920929, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.25, |
|
"learning_rate": 2.181894738433076e-06, |
|
"logits/chosen": -3.5467307567596436, |
|
"logits/rejected": -3.588332414627075, |
|
"logps/chosen": -248.4571990966797, |
|
"logps/rejected": -221.55154418945312, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.626908540725708, |
|
"rewards/diff": -0.0030008137691766024, |
|
"rewards/diff_abs": 1.0150421857833862, |
|
"rewards/rejected": 0.7426697015762329, |
|
"rewards/student_margin": 0.8842388391494751, |
|
"rewards/teacher_margin": 0.8872395753860474, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.625, |
|
"learning_rate": 2.078473025700937e-06, |
|
"logits/chosen": -3.5422046184539795, |
|
"logits/rejected": -3.618915557861328, |
|
"logps/chosen": -197.5839385986328, |
|
"logps/rejected": -168.53799438476562, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": 0.9767719507217407, |
|
"rewards/diff": 0.020980846136808395, |
|
"rewards/diff_abs": 1.2862763404846191, |
|
"rewards/rejected": 0.22818705439567566, |
|
"rewards/student_margin": 0.748585045337677, |
|
"rewards/teacher_margin": 0.7276042103767395, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 1.975788846737431e-06, |
|
"logits/chosen": -3.4971141815185547, |
|
"logits/rejected": -3.526686191558838, |
|
"logps/chosen": -224.8160400390625, |
|
"logps/rejected": -224.65371704101562, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 1.1530336141586304, |
|
"rewards/diff": -0.030303645879030228, |
|
"rewards/diff_abs": 1.0633232593536377, |
|
"rewards/rejected": 0.4067746698856354, |
|
"rewards/student_margin": 0.7462589144706726, |
|
"rewards/teacher_margin": 0.7765625715255737, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 1.8740218651325714e-06, |
|
"logits/chosen": -3.4748759269714355, |
|
"logits/rejected": -3.4663357734680176, |
|
"logps/chosen": -258.1708679199219, |
|
"logps/rejected": -236.91549682617188, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": 1.9041097164154053, |
|
"rewards/diff": 0.3186507225036621, |
|
"rewards/diff_abs": 1.0768160820007324, |
|
"rewards/rejected": 0.7318129539489746, |
|
"rewards/student_margin": 1.1722967624664307, |
|
"rewards/teacher_margin": 0.853645920753479, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 1.7733501396822178e-06, |
|
"logits/chosen": -3.5963053703308105, |
|
"logits/rejected": -3.566746234893799, |
|
"logps/chosen": -200.7073211669922, |
|
"logps/rejected": -181.52761840820312, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 1.2579277753829956, |
|
"rewards/diff": -0.2456444799900055, |
|
"rewards/diff_abs": 1.066627025604248, |
|
"rewards/rejected": 0.5113847851753235, |
|
"rewards/student_margin": 0.7465430498123169, |
|
"rewards/teacher_margin": 0.9921875, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 1.6739498128436563e-06, |
|
"logits/chosen": -3.5266900062561035, |
|
"logits/rejected": -3.5792396068573, |
|
"logps/chosen": -277.3493957519531, |
|
"logps/rejected": -250.41488647460938, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.8075897693634033, |
|
"rewards/diff": 0.4050876498222351, |
|
"rewards/diff_abs": 1.2073490619659424, |
|
"rewards/rejected": 0.4259396195411682, |
|
"rewards/student_margin": 1.3816502094268799, |
|
"rewards/teacher_margin": 0.9765625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 1.5759948025441535e-06, |
|
"logits/chosen": -3.3835601806640625, |
|
"logits/rejected": -3.446404218673706, |
|
"logps/chosen": -268.1842041015625, |
|
"logps/rejected": -229.45700073242188, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.4978923797607422, |
|
"rewards/diff": 0.04186774417757988, |
|
"rewards/diff_abs": 1.2465837001800537, |
|
"rewards/rejected": 0.4945663511753082, |
|
"rewards/student_margin": 1.003326177597046, |
|
"rewards/teacher_margin": 0.9614583849906921, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 10.0, |
|
"learning_rate": 1.479656497881698e-06, |
|
"logits/chosen": -3.572722911834717, |
|
"logits/rejected": -3.628993511199951, |
|
"logps/chosen": -231.67037963867188, |
|
"logps/rejected": -189.6853790283203, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 1.3352339267730713, |
|
"rewards/diff": -0.23537194728851318, |
|
"rewards/diff_abs": 1.2142590284347534, |
|
"rewards/rejected": 0.7659183740615845, |
|
"rewards/student_margin": 0.5693155527114868, |
|
"rewards/teacher_margin": 0.8046875, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.3851034592503648e-06, |
|
"logits/chosen": -3.4025959968566895, |
|
"logits/rejected": -3.5293147563934326, |
|
"logps/chosen": -274.0171203613281, |
|
"logps/rejected": -199.73716735839844, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.7333332300186157, |
|
"rewards/chosen": 1.475367784500122, |
|
"rewards/diff": 0.08657832443714142, |
|
"rewards/diff_abs": 0.861635684967041, |
|
"rewards/rejected": 0.5617061257362366, |
|
"rewards/student_margin": 0.9136616587638855, |
|
"rewards/teacher_margin": 0.82708340883255, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11.75, |
|
"learning_rate": 1.2925011234149859e-06, |
|
"logits/chosen": -3.494055986404419, |
|
"logits/rejected": -3.6171557903289795, |
|
"logps/chosen": -205.4471435546875, |
|
"logps/rejected": -157.2217559814453, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.3053886890411377, |
|
"rewards/diff": 0.1055004820227623, |
|
"rewards/diff_abs": 1.2906330823898315, |
|
"rewards/rejected": 0.30822157859802246, |
|
"rewards/student_margin": 0.9971672296524048, |
|
"rewards/teacher_margin": 0.8916667699813843, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.2020115140511436e-06, |
|
"logits/chosen": -3.38506817817688, |
|
"logits/rejected": -3.3986282348632812, |
|
"logps/chosen": -287.0667419433594, |
|
"logps/rejected": -257.8066711425781, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 1.4152857065200806, |
|
"rewards/diff": 0.06253819167613983, |
|
"rewards/diff_abs": 0.9119114875793457, |
|
"rewards/rejected": 0.5600391626358032, |
|
"rewards/student_margin": 0.8552465438842773, |
|
"rewards/teacher_margin": 0.7927082777023315, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.375, |
|
"learning_rate": 1.11379295825695e-06, |
|
"logits/chosen": -3.4194672107696533, |
|
"logits/rejected": -3.4630534648895264, |
|
"logps/chosen": -275.80841064453125, |
|
"logps/rejected": -247.9615478515625, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 1.539294958114624, |
|
"rewards/diff": -0.13074719905853271, |
|
"rewards/diff_abs": 0.9170882105827332, |
|
"rewards/rejected": 0.9658753275871277, |
|
"rewards/student_margin": 0.5734195113182068, |
|
"rewards/teacher_margin": 0.7041667699813843, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 10.875, |
|
"learning_rate": 1.0279998095326188e-06, |
|
"logits/chosen": -3.5342392921447754, |
|
"logits/rejected": -3.6398627758026123, |
|
"logps/chosen": -282.4989013671875, |
|
"logps/rejected": -232.01602172851562, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": 1.5444934368133545, |
|
"rewards/diff": 0.07039527595043182, |
|
"rewards/diff_abs": 0.9651015996932983, |
|
"rewards/rejected": 0.6813898682594299, |
|
"rewards/student_margin": 0.8631036877632141, |
|
"rewards/teacher_margin": 0.7927082777023315, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 10.375, |
|
"learning_rate": 9.447821777125376e-07, |
|
"logits/chosen": -3.4949746131896973, |
|
"logits/rejected": -3.4841065406799316, |
|
"logps/chosen": -235.8585968017578, |
|
"logps/rejected": -223.1814422607422, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.090267539024353, |
|
"rewards/diff": -0.26063305139541626, |
|
"rewards/diff_abs": 1.1585631370544434, |
|
"rewards/rejected": 0.42069220542907715, |
|
"rewards/student_margin": 0.6695753335952759, |
|
"rewards/teacher_margin": 0.9302083849906921, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 8.642856663223537e-07, |
|
"logits/chosen": -3.6274445056915283, |
|
"logits/rejected": -3.7008399963378906, |
|
"logps/chosen": -279.4967346191406, |
|
"logps/rejected": -193.52825927734375, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.8333331942558289, |
|
"rewards/chosen": 1.603075623512268, |
|
"rewards/diff": 0.050136499106884, |
|
"rewards/diff_abs": 0.9624601602554321, |
|
"rewards/rejected": 0.5263765454292297, |
|
"rewards/student_margin": 1.0766990184783936, |
|
"rewards/teacher_margin": 1.0265624523162842, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 7.866511178206202e-07, |
|
"logits/chosen": -3.556497097015381, |
|
"logits/rejected": -3.509038209915161, |
|
"logps/chosen": -290.5392150878906, |
|
"logps/rejected": -260.15875244140625, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 1.7650150060653687, |
|
"rewards/diff": -0.24563904106616974, |
|
"rewards/diff_abs": 1.3142454624176025, |
|
"rewards/rejected": 1.0887789726257324, |
|
"rewards/student_margin": 0.6762360334396362, |
|
"rewards/teacher_margin": 0.921875, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 11.375, |
|
"learning_rate": 7.120143671707535e-07, |
|
"logits/chosen": -3.6382040977478027, |
|
"logits/rejected": -3.5810635089874268, |
|
"logps/chosen": -239.7833709716797, |
|
"logps/rejected": -191.7135772705078, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.411780595779419, |
|
"rewards/diff": 0.06282065808773041, |
|
"rewards/diff_abs": 0.9871824383735657, |
|
"rewards/rejected": 0.6317722797393799, |
|
"rewards/student_margin": 0.7800081968307495, |
|
"rewards/teacher_margin": 0.7171874642372131, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.0, |
|
"learning_rate": 6.405060041744557e-07, |
|
"logits/chosen": -3.4055404663085938, |
|
"logits/rejected": -3.4413161277770996, |
|
"logps/chosen": -315.9834899902344, |
|
"logps/rejected": -280.46771240234375, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.993194580078125, |
|
"rewards/diff": 0.17093998193740845, |
|
"rewards/diff_abs": 1.2821754217147827, |
|
"rewards/rejected": 0.9248586893081665, |
|
"rewards/student_margin": 1.068335771560669, |
|
"rewards/teacher_margin": 0.8973957896232605, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 11.375, |
|
"learning_rate": 5.72251144982447e-07, |
|
"logits/chosen": -3.526531219482422, |
|
"logits/rejected": -3.4491629600524902, |
|
"logps/chosen": -256.53570556640625, |
|
"logps/rejected": -279.9180603027344, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.8302761316299438, |
|
"rewards/diff": 0.47802895307540894, |
|
"rewards/diff_abs": 1.4019181728363037, |
|
"rewards/rejected": 0.6298513412475586, |
|
"rewards/student_margin": 1.2004249095916748, |
|
"rewards/teacher_margin": 0.7223958969116211, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.0, |
|
"learning_rate": 5.07369213182295e-07, |
|
"logits/chosen": -3.4488792419433594, |
|
"logits/rejected": -3.5185768604278564, |
|
"logps/chosen": -257.1033630371094, |
|
"logps/rejected": -192.66726684570312, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": 1.1292977333068848, |
|
"rewards/diff": 0.002992980182170868, |
|
"rewards/diff_abs": 1.323104977607727, |
|
"rewards/rejected": 0.12526309490203857, |
|
"rewards/student_margin": 1.0040346384048462, |
|
"rewards/teacher_margin": 1.0010416507720947, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 4.4597373084635717e-07, |
|
"logits/chosen": -3.419471263885498, |
|
"logits/rejected": -3.40906023979187, |
|
"logps/chosen": -296.2270812988281, |
|
"logps/rejected": -242.5465850830078, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 1.3991469144821167, |
|
"rewards/diff": -0.30492842197418213, |
|
"rewards/diff_abs": 1.2077829837799072, |
|
"rewards/rejected": 0.8592837452888489, |
|
"rewards/student_margin": 0.5398632884025574, |
|
"rewards/teacher_margin": 0.8447917103767395, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 3.88172119905435e-07, |
|
"logits/chosen": -3.573878526687622, |
|
"logits/rejected": -3.4745190143585205, |
|
"logps/chosen": -265.7789001464844, |
|
"logps/rejected": -231.770263671875, |
|
"loss": 0.5098, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.3960720300674438, |
|
"rewards/diff": 0.12062199413776398, |
|
"rewards/diff_abs": 0.9348724484443665, |
|
"rewards/rejected": 0.4093042314052582, |
|
"rewards/student_margin": 0.9867678880691528, |
|
"rewards/teacher_margin": 0.86614590883255, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 8.875, |
|
"learning_rate": 3.3406551419567584e-07, |
|
"logits/chosen": -3.4966206550598145, |
|
"logits/rejected": -3.4546685218811035, |
|
"logps/chosen": -286.70538330078125, |
|
"logps/rejected": -290.0686950683594, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": 1.7340021133422852, |
|
"rewards/diff": 0.5937216281890869, |
|
"rewards/diff_abs": 1.2892600297927856, |
|
"rewards/rejected": 0.528822124004364, |
|
"rewards/student_margin": 1.2051799297332764, |
|
"rewards/teacher_margin": 0.6114583611488342, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 10.0, |
|
"learning_rate": 2.837485825075728e-07, |
|
"logits/chosen": -3.5864462852478027, |
|
"logits/rejected": -3.6643550395965576, |
|
"logps/chosen": -302.582763671875, |
|
"logps/rejected": -229.8857879638672, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 1.3810118436813354, |
|
"rewards/diff": -0.328029602766037, |
|
"rewards/diff_abs": 1.2970329523086548, |
|
"rewards/rejected": 0.7757080793380737, |
|
"rewards/student_margin": 0.6053037643432617, |
|
"rewards/teacher_margin": 0.9333332777023315, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 10.25, |
|
"learning_rate": 2.37309362946673e-07, |
|
"logits/chosen": -3.469447612762451, |
|
"logits/rejected": -3.529064655303955, |
|
"logps/chosen": -201.64187622070312, |
|
"logps/rejected": -166.51071166992188, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.9876018762588501, |
|
"rewards/diff": 0.09399458020925522, |
|
"rewards/diff_abs": 0.9936901330947876, |
|
"rewards/rejected": 0.07589896023273468, |
|
"rewards/student_margin": 0.9117029309272766, |
|
"rewards/teacher_margin": 0.8177083134651184, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 9.375, |
|
"learning_rate": 1.948291088958032e-07, |
|
"logits/chosen": -3.3895657062530518, |
|
"logits/rejected": -3.42724347114563, |
|
"logps/chosen": -260.0352783203125, |
|
"logps/rejected": -211.0215606689453, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 1.2599786520004272, |
|
"rewards/diff": -0.1080915778875351, |
|
"rewards/diff_abs": 1.3488976955413818, |
|
"rewards/rejected": 0.6868201494216919, |
|
"rewards/student_margin": 0.5731583833694458, |
|
"rewards/teacher_margin": 0.6812499761581421, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 1.5638214684833923e-07, |
|
"logits/chosen": -3.3913490772247314, |
|
"logits/rejected": -3.495671510696411, |
|
"logps/chosen": -283.8644714355469, |
|
"logps/rejected": -207.0258026123047, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.7908437252044678, |
|
"rewards/diff": 0.14334459602832794, |
|
"rewards/diff_abs": 1.1933469772338867, |
|
"rewards/rejected": 0.6808325052261353, |
|
"rewards/student_margin": 1.1100112199783325, |
|
"rewards/teacher_margin": 0.9666666984558105, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 12.25, |
|
"learning_rate": 1.220357463612501e-07, |
|
"logits/chosen": -3.5331833362579346, |
|
"logits/rejected": -3.496367931365967, |
|
"logps/chosen": -264.0143127441406, |
|
"logps/rejected": -205.7065887451172, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.7263545989990234, |
|
"rewards/diff": 0.29247918725013733, |
|
"rewards/diff_abs": 0.9457036852836609, |
|
"rewards/rejected": 0.7515836358070374, |
|
"rewards/student_margin": 0.9747709035873413, |
|
"rewards/teacher_margin": 0.6822917461395264, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 9.185000235546443e-08, |
|
"logits/chosen": -3.5394463539123535, |
|
"logits/rejected": -3.528214931488037, |
|
"logps/chosen": -222.8568572998047, |
|
"logps/rejected": -199.4870147705078, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.5666666626930237, |
|
"rewards/chosen": 1.2547296285629272, |
|
"rewards/diff": -0.4252438545227051, |
|
"rewards/diff_abs": 0.9407827258110046, |
|
"rewards/rejected": 0.9872652292251587, |
|
"rewards/student_margin": 0.2674644887447357, |
|
"rewards/teacher_margin": 0.6927083730697632, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 6.587772996949876e-08, |
|
"logits/chosen": -3.472136974334717, |
|
"logits/rejected": -3.594128370285034, |
|
"logps/chosen": -274.9361877441406, |
|
"logps/rejected": -187.9529266357422, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 1.5798580646514893, |
|
"rewards/diff": 0.20931819081306458, |
|
"rewards/diff_abs": 0.9370753169059753, |
|
"rewards/rejected": 0.4444982409477234, |
|
"rewards/student_margin": 1.1353598833084106, |
|
"rewards/teacher_margin": 0.9260417819023132, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.375, |
|
"learning_rate": 4.416437215030628e-08, |
|
"logits/chosen": -3.366868257522583, |
|
"logits/rejected": -3.4336013793945312, |
|
"logps/chosen": -232.9638214111328, |
|
"logps/rejected": -209.1346893310547, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 1.4733285903930664, |
|
"rewards/diff": -0.1160399541258812, |
|
"rewards/diff_abs": 1.4289867877960205, |
|
"rewards/rejected": 0.5935351252555847, |
|
"rewards/student_margin": 0.8797934651374817, |
|
"rewards/teacher_margin": 0.9958333969116211, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 2.6747920143047056e-08, |
|
"logits/chosen": -3.585693836212158, |
|
"logits/rejected": -3.666484832763672, |
|
"logps/chosen": -243.569091796875, |
|
"logps/rejected": -184.44293212890625, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.76666659116745, |
|
"rewards/chosen": 1.4393314123153687, |
|
"rewards/diff": 0.24042055010795593, |
|
"rewards/diff_abs": 1.126199722290039, |
|
"rewards/rejected": 0.07599426060914993, |
|
"rewards/student_margin": 1.3633372783660889, |
|
"rewards/teacher_margin": 1.1229166984558105, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 12.0, |
|
"learning_rate": 1.3658847018884758e-08, |
|
"logits/chosen": -3.3958117961883545, |
|
"logits/rejected": -3.488321304321289, |
|
"logps/chosen": -304.1349792480469, |
|
"logps/rejected": -259.19927978515625, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.6452767848968506, |
|
"rewards/diff": -0.22879931330680847, |
|
"rewards/diff_abs": 1.1713745594024658, |
|
"rewards/rejected": 1.1782429218292236, |
|
"rewards/student_margin": 0.4670340418815613, |
|
"rewards/teacher_margin": 0.6958333849906921, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.920054357119841e-09, |
|
"logits/chosen": -3.4455044269561768, |
|
"logits/rejected": -3.4982807636260986, |
|
"logps/chosen": -252.8186798095703, |
|
"logps/rejected": -198.8025665283203, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.7279932498931885, |
|
"rewards/diff": 0.12250219285488129, |
|
"rewards/diff_abs": 0.8293051719665527, |
|
"rewards/rejected": 0.713824450969696, |
|
"rewards/student_margin": 1.0141689777374268, |
|
"rewards/teacher_margin": 0.8916667699813843, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 10.875, |
|
"learning_rate": 5.468321749468875e-10, |
|
"logits/chosen": -3.456815242767334, |
|
"logits/rejected": -3.5720372200012207, |
|
"logps/chosen": -234.10720825195312, |
|
"logps/rejected": -200.9365692138672, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9176043272018433, |
|
"rewards/diff": -0.2213120013475418, |
|
"rewards/diff_abs": 0.8564842343330383, |
|
"rewards/rejected": 0.30974966287612915, |
|
"rewards/student_margin": 0.6078547239303589, |
|
"rewards/teacher_margin": 0.8291667103767395, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 835, |
|
"total_flos": 0.0, |
|
"train_loss": 0.54411713648699, |
|
"train_runtime": 5965.6032, |
|
"train_samples_per_second": 26.864, |
|
"train_steps_per_second": 0.14 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 835, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000000000000000000000000000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|