|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.517799352750809, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"logits/chosen": -2.0985755920410156, |
|
"logits/rejected": -1.9598942995071411, |
|
"logps/chosen": -282.9971618652344, |
|
"logps/rejected": -239.9343719482422, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008073234930634499, |
|
"rewards/margins": -0.0036141639575362206, |
|
"rewards/rejected": -0.004459070973098278, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -2.0734448432922363, |
|
"logits/rejected": -2.004692316055298, |
|
"logps/chosen": -277.91009521484375, |
|
"logps/rejected": -271.27777099609375, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0035435440950095654, |
|
"rewards/margins": -0.0011311531998217106, |
|
"rewards/rejected": 0.004674696363508701, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": -2.2034449577331543, |
|
"logits/rejected": -2.15450382232666, |
|
"logps/chosen": -272.84222412109375, |
|
"logps/rejected": -296.7918701171875, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.011506916023790836, |
|
"rewards/margins": -0.0003993515856564045, |
|
"rewards/rejected": 0.011906265281140804, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.265322685241699, |
|
"logits/rejected": -2.2147812843322754, |
|
"logps/chosen": -371.73626708984375, |
|
"logps/rejected": -411.03802490234375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00851450115442276, |
|
"rewards/margins": 0.0020747678354382515, |
|
"rewards/rejected": 0.0064397333189845085, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.126523971557617, |
|
"logits/rejected": -2.257094383239746, |
|
"logps/chosen": -232.20245361328125, |
|
"logps/rejected": -273.501953125, |
|
"loss": 0.6971, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.012802458368241787, |
|
"rewards/margins": -0.007561136037111282, |
|
"rewards/rejected": -0.005241322796791792, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -2.3206775188446045, |
|
"logits/rejected": -2.236947774887085, |
|
"logps/chosen": -282.0924072265625, |
|
"logps/rejected": -330.6745910644531, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.012886857613921165, |
|
"rewards/margins": 0.0036454680375754833, |
|
"rewards/rejected": 0.00924139004200697, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"logits/chosen": -2.068502426147461, |
|
"logits/rejected": -2.1196892261505127, |
|
"logps/chosen": -270.6734313964844, |
|
"logps/rejected": -337.42877197265625, |
|
"loss": 0.6981, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.01318061351776123, |
|
"rewards/margins": -0.009621287696063519, |
|
"rewards/rejected": -0.003559327684342861, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.100869655609131, |
|
"logits/rejected": -2.2541885375976562, |
|
"logps/chosen": -310.18951416015625, |
|
"logps/rejected": -404.02984619140625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0011783126974478364, |
|
"rewards/margins": 0.0021168000530451536, |
|
"rewards/rejected": -0.0009384873555973172, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-06, |
|
"logits/chosen": -2.0913190841674805, |
|
"logits/rejected": -2.1440823078155518, |
|
"logps/chosen": -293.1015625, |
|
"logps/rejected": -351.9281005859375, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0038339614402502775, |
|
"rewards/margins": -0.0011605499312281609, |
|
"rewards/rejected": 0.004994511604309082, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.306243419647217, |
|
"logits/rejected": -2.3045802116394043, |
|
"logps/chosen": -386.077392578125, |
|
"logps/rejected": -367.2294921875, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0069270143285393715, |
|
"rewards/margins": 0.010493995621800423, |
|
"rewards/rejected": -0.01742100901901722, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.500000000000001e-06, |
|
"logits/chosen": -2.136928081512451, |
|
"logits/rejected": -2.2102103233337402, |
|
"logps/chosen": -302.4460754394531, |
|
"logps/rejected": -337.15631103515625, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.006688881199806929, |
|
"rewards/margins": 0.004262590315192938, |
|
"rewards/rejected": -0.010951472446322441, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6e-06, |
|
"logits/chosen": -2.064110279083252, |
|
"logits/rejected": -2.200680732727051, |
|
"logps/chosen": -320.602294921875, |
|
"logps/rejected": -369.6840515136719, |
|
"loss": 0.6988, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0003773693460971117, |
|
"rewards/margins": -0.0108009809628129, |
|
"rewards/rejected": 0.011178349144756794, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"logits/chosen": -2.348961353302002, |
|
"logits/rejected": -2.3161768913269043, |
|
"logps/chosen": -400.9246826171875, |
|
"logps/rejected": -472.69769287109375, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0075957290828228, |
|
"rewards/margins": 0.017815779894590378, |
|
"rewards/rejected": -0.010220050811767578, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.000000000000001e-06, |
|
"logits/chosen": -2.412529468536377, |
|
"logits/rejected": -2.233689785003662, |
|
"logps/chosen": -422.3519287109375, |
|
"logps/rejected": -368.8478088378906, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0051600453443825245, |
|
"rewards/margins": -0.0017115597147494555, |
|
"rewards/rejected": 0.006871605291962624, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.5e-06, |
|
"logits/chosen": -2.0220694541931152, |
|
"logits/rejected": -2.0454282760620117, |
|
"logps/chosen": -315.8769836425781, |
|
"logps/rejected": -314.20269775390625, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -4.372652620077133e-05, |
|
"rewards/margins": -0.00020327605307102203, |
|
"rewards/rejected": 0.000159549992531538, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -2.0351786613464355, |
|
"logits/rejected": -1.877925992012024, |
|
"logps/chosen": -383.6861572265625, |
|
"logps/rejected": -276.95050048828125, |
|
"loss": 0.6963, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0005419491790235043, |
|
"rewards/margins": -0.005987692158669233, |
|
"rewards/rejected": 0.005445742513984442, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.500000000000002e-06, |
|
"logits/chosen": -2.0652737617492676, |
|
"logits/rejected": -2.2235422134399414, |
|
"logps/chosen": -343.7192077636719, |
|
"logps/rejected": -360.5483703613281, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007448338903486729, |
|
"rewards/margins": 0.015540864318609238, |
|
"rewards/rejected": -0.00809252168983221, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9e-06, |
|
"logits/chosen": -2.2705795764923096, |
|
"logits/rejected": -2.159031867980957, |
|
"logps/chosen": -335.9582824707031, |
|
"logps/rejected": -337.2821044921875, |
|
"loss": 0.6976, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.010970616713166237, |
|
"rewards/margins": -0.008028840646147728, |
|
"rewards/rejected": -0.0029417751356959343, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.5e-06, |
|
"logits/chosen": -1.9825160503387451, |
|
"logits/rejected": -2.1957268714904785, |
|
"logps/chosen": -275.072509765625, |
|
"logps/rejected": -364.77215576171875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0009894607355818152, |
|
"rewards/margins": 0.002422523219138384, |
|
"rewards/rejected": -0.0034119843039661646, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -2.0461435317993164, |
|
"logits/rejected": -2.114314556121826, |
|
"logps/chosen": -280.7231140136719, |
|
"logps/rejected": -340.37872314453125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0057319882325828075, |
|
"rewards/margins": 0.0008945947047322989, |
|
"rewards/rejected": 0.004837393760681152, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.05e-05, |
|
"logits/chosen": -2.2721879482269287, |
|
"logits/rejected": -2.31955885887146, |
|
"logps/chosen": -350.15765380859375, |
|
"logps/rejected": -346.72943115234375, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.01111826952546835, |
|
"rewards/margins": -0.005553150083869696, |
|
"rewards/rejected": -0.0055651189759373665, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"logits/chosen": -2.297549247741699, |
|
"logits/rejected": -2.258702516555786, |
|
"logps/chosen": -354.07342529296875, |
|
"logps/rejected": -381.96453857421875, |
|
"loss": 0.7021, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.010824179276823997, |
|
"rewards/margins": -0.01675737090408802, |
|
"rewards/rejected": 0.005933189764618874, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"logits/chosen": -2.095402240753174, |
|
"logits/rejected": -2.2284393310546875, |
|
"logps/chosen": -367.1632385253906, |
|
"logps/rejected": -395.11016845703125, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00539558008313179, |
|
"rewards/margins": 0.018006421625614166, |
|
"rewards/rejected": -0.023401999846100807, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2e-05, |
|
"logits/chosen": -2.058290719985962, |
|
"logits/rejected": -2.142444133758545, |
|
"logps/chosen": -208.247314453125, |
|
"logps/rejected": -261.3697204589844, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0028132672887295485, |
|
"rewards/margins": 0.01700596883893013, |
|
"rewards/rejected": -0.0198192335665226, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.25e-05, |
|
"logits/chosen": -1.9966950416564941, |
|
"logits/rejected": -2.0200271606445312, |
|
"logps/chosen": -390.6391906738281, |
|
"logps/rejected": -335.07867431640625, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008169196546077728, |
|
"rewards/margins": 0.007835723459720612, |
|
"rewards/rejected": -0.01600492000579834, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"logits/chosen": -2.1417901515960693, |
|
"logits/rejected": -2.102891683578491, |
|
"logps/chosen": -334.50872802734375, |
|
"logps/rejected": -364.5052795410156, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.019382527098059654, |
|
"rewards/margins": 0.01085577066987753, |
|
"rewards/rejected": -0.03023829497396946, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"logits/chosen": -2.243198871612549, |
|
"logits/rejected": -2.3117640018463135, |
|
"logps/chosen": -343.4504699707031, |
|
"logps/rejected": -356.1696472167969, |
|
"loss": 0.7073, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.03033299744129181, |
|
"rewards/margins": -0.027142930775880814, |
|
"rewards/rejected": -0.0031900645699352026, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"logits/chosen": -2.0717036724090576, |
|
"logits/rejected": -2.2167444229125977, |
|
"logps/chosen": -270.87353515625, |
|
"logps/rejected": -327.87640380859375, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.032239750027656555, |
|
"rewards/margins": -0.010378909297287464, |
|
"rewards/rejected": -0.021860837936401367, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.45e-05, |
|
"logits/chosen": -2.2460880279541016, |
|
"logits/rejected": -2.2309041023254395, |
|
"logps/chosen": -435.2510986328125, |
|
"logps/rejected": -461.74859619140625, |
|
"loss": 0.6958, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.018374010920524597, |
|
"rewards/margins": -0.004351234529167414, |
|
"rewards/rejected": -0.01402278058230877, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5e-05, |
|
"logits/chosen": -2.228811740875244, |
|
"logits/rejected": -2.293030261993408, |
|
"logps/chosen": -382.5074768066406, |
|
"logps/rejected": -371.8452453613281, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004188417922705412, |
|
"rewards/margins": 0.01916835457086563, |
|
"rewards/rejected": -0.014979935251176357, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.55e-05, |
|
"logits/chosen": -1.9169931411743164, |
|
"logits/rejected": -2.1672849655151367, |
|
"logps/chosen": -270.2207946777344, |
|
"logps/rejected": -279.6405944824219, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.002476263325661421, |
|
"rewards/margins": 0.003316545393317938, |
|
"rewards/rejected": -0.0008402818348258734, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": -2.349118232727051, |
|
"logits/rejected": -2.260673999786377, |
|
"logps/chosen": -375.1436462402344, |
|
"logps/rejected": -423.8641357421875, |
|
"loss": 0.6963, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.018851473927497864, |
|
"rewards/margins": -0.004983377177268267, |
|
"rewards/rejected": -0.013868091627955437, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.65e-05, |
|
"logits/chosen": -2.253589630126953, |
|
"logits/rejected": -2.3596742153167725, |
|
"logps/chosen": -454.2779846191406, |
|
"logps/rejected": -503.1221008300781, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.004412556067109108, |
|
"rewards/margins": 0.02018122747540474, |
|
"rewards/rejected": -0.024593783542513847, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"logits/chosen": -2.041515588760376, |
|
"logits/rejected": -2.1577486991882324, |
|
"logps/chosen": -338.1881103515625, |
|
"logps/rejected": -387.30023193359375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.011815071105957031, |
|
"rewards/margins": 0.009705852717161179, |
|
"rewards/rejected": -0.02152092382311821, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.75e-05, |
|
"logits/chosen": -2.2442219257354736, |
|
"logits/rejected": -2.3945209980010986, |
|
"logps/chosen": -293.7795715332031, |
|
"logps/rejected": -396.3448181152344, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.004475045017898083, |
|
"rewards/margins": 0.015541339293122292, |
|
"rewards/rejected": -0.0200163833796978, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8e-05, |
|
"logits/chosen": -2.0793919563293457, |
|
"logits/rejected": -2.1432011127471924, |
|
"logps/chosen": -322.59869384765625, |
|
"logps/rejected": -437.3168029785156, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006924772635102272, |
|
"rewards/margins": 0.03214583545923233, |
|
"rewards/rejected": -0.02522106282413006, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.85e-05, |
|
"logits/chosen": -2.155754804611206, |
|
"logits/rejected": -2.2335851192474365, |
|
"logps/chosen": -295.4434814453125, |
|
"logps/rejected": -359.8757019042969, |
|
"loss": 0.7043, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.023294713348150253, |
|
"rewards/margins": -0.02007477357983589, |
|
"rewards/rejected": -0.0032199383713304996, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9e-05, |
|
"logits/chosen": -2.2595698833465576, |
|
"logits/rejected": -2.064648151397705, |
|
"logps/chosen": -310.83416748046875, |
|
"logps/rejected": -272.6891784667969, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.003548526205122471, |
|
"rewards/margins": 0.058505721390247345, |
|
"rewards/rejected": -0.06205424666404724, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"logits/chosen": -2.247586250305176, |
|
"logits/rejected": -2.2411181926727295, |
|
"logps/chosen": -410.89801025390625, |
|
"logps/rejected": -397.5184631347656, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.023591995239257812, |
|
"rewards/margins": 0.0011774520389735699, |
|
"rewards/rejected": -0.024769451469182968, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -2.0857701301574707, |
|
"logits/rejected": -2.3974575996398926, |
|
"logps/chosen": -298.82373046875, |
|
"logps/rejected": -322.1784973144531, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.012818144634366035, |
|
"rewards/margins": 0.05216258019208908, |
|
"rewards/rejected": -0.0393444299697876, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.05e-05, |
|
"logits/chosen": -2.0326108932495117, |
|
"logits/rejected": -2.021967887878418, |
|
"logps/chosen": -322.1849365234375, |
|
"logps/rejected": -286.97393798828125, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03254096582531929, |
|
"rewards/margins": 0.01258254237473011, |
|
"rewards/rejected": -0.04512350261211395, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1e-05, |
|
"logits/chosen": -2.1398110389709473, |
|
"logits/rejected": -2.1065280437469482, |
|
"logps/chosen": -315.64190673828125, |
|
"logps/rejected": -273.958740234375, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.06157093122601509, |
|
"rewards/margins": 0.055743757635354996, |
|
"rewards/rejected": -0.11731469631195068, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.15e-05, |
|
"logits/chosen": -2.027090549468994, |
|
"logits/rejected": -2.1431775093078613, |
|
"logps/chosen": -328.4605407714844, |
|
"logps/rejected": -336.6142272949219, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.002649687696248293, |
|
"rewards/margins": 0.030248070135712624, |
|
"rewards/rejected": -0.02759838104248047, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"logits/chosen": -2.1685423851013184, |
|
"logits/rejected": -2.3537817001342773, |
|
"logps/chosen": -280.93017578125, |
|
"logps/rejected": -430.02398681640625, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.009914875030517578, |
|
"rewards/margins": 0.031635358929634094, |
|
"rewards/rejected": -0.04155023396015167, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.25e-05, |
|
"logits/chosen": -2.026183605194092, |
|
"logits/rejected": -2.2760047912597656, |
|
"logps/chosen": -327.8951416015625, |
|
"logps/rejected": -326.314697265625, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.003114223014563322, |
|
"rewards/margins": 0.017296195030212402, |
|
"rewards/rejected": -0.020410416647791862, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"logits/chosen": -2.2608642578125, |
|
"logits/rejected": -2.2612316608428955, |
|
"logps/chosen": -313.09063720703125, |
|
"logps/rejected": -299.6613464355469, |
|
"loss": 0.6969, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0639270767569542, |
|
"rewards/margins": -0.005310798529535532, |
|
"rewards/rejected": -0.0586162805557251, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.35e-05, |
|
"logits/chosen": -2.0354762077331543, |
|
"logits/rejected": -2.2385144233703613, |
|
"logps/chosen": -361.6187744140625, |
|
"logps/rejected": -416.43804931640625, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0550750270485878, |
|
"rewards/margins": 0.021474791690707207, |
|
"rewards/rejected": -0.07654982060194016, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4e-05, |
|
"logits/chosen": -2.1052396297454834, |
|
"logits/rejected": -2.1244544982910156, |
|
"logps/chosen": -401.3201904296875, |
|
"logps/rejected": -394.404052734375, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04391036182641983, |
|
"rewards/margins": 0.048601724207401276, |
|
"rewards/rejected": -0.0925120860338211, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.45e-05, |
|
"logits/chosen": -2.325778007507324, |
|
"logits/rejected": -2.1452741622924805, |
|
"logps/chosen": -359.43255615234375, |
|
"logps/rejected": -393.9988708496094, |
|
"loss": 0.6997, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03692295402288437, |
|
"rewards/margins": -0.005499981343746185, |
|
"rewards/rejected": -0.031422972679138184, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-05, |
|
"logits/chosen": -2.0367650985717773, |
|
"logits/rejected": -2.193774700164795, |
|
"logps/chosen": -318.42266845703125, |
|
"logps/rejected": -370.5718994140625, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.03530995920300484, |
|
"rewards/margins": 0.06544995307922363, |
|
"rewards/rejected": -0.10075991600751877, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"logits/chosen": -2.1022160053253174, |
|
"logits/rejected": -2.0698752403259277, |
|
"logps/chosen": -338.8807678222656, |
|
"logps/rejected": -333.04351806640625, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04516604542732239, |
|
"rewards/margins": 0.047384001314640045, |
|
"rewards/rejected": -0.09255003929138184, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"logits/chosen": -2.1273610591888428, |
|
"logits/rejected": -2.2891712188720703, |
|
"logps/chosen": -307.1384582519531, |
|
"logps/rejected": -314.24676513671875, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024128681048750877, |
|
"rewards/margins": 0.040785644203424454, |
|
"rewards/rejected": -0.06491431593894958, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"logits/chosen": -1.951267957687378, |
|
"logits/rejected": -2.171773672103882, |
|
"logps/chosen": -313.5145568847656, |
|
"logps/rejected": -345.32452392578125, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.06882210075855255, |
|
"rewards/margins": 0.04536902531981468, |
|
"rewards/rejected": -0.11419112980365753, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"logits/chosen": -2.318854808807373, |
|
"logits/rejected": -2.4153428077697754, |
|
"logps/chosen": -407.51080322265625, |
|
"logps/rejected": -407.7454833984375, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.14593330025672913, |
|
"rewards/margins": 0.0101944450289011, |
|
"rewards/rejected": -0.15612773597240448, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"logits/chosen": -2.251836061477661, |
|
"logits/rejected": -2.0312557220458984, |
|
"logps/chosen": -257.6128234863281, |
|
"logps/rejected": -260.79705810546875, |
|
"loss": 0.7024, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12117181718349457, |
|
"rewards/margins": -0.015745995566248894, |
|
"rewards/rejected": -0.10542581230401993, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"logits/chosen": -2.3783812522888184, |
|
"logits/rejected": -2.2356433868408203, |
|
"logps/chosen": -322.89599609375, |
|
"logps/rejected": -305.649658203125, |
|
"loss": 0.7044, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0736481174826622, |
|
"rewards/margins": -0.007364703342318535, |
|
"rewards/rejected": -0.06628341972827911, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"logits/chosen": -2.0815675258636475, |
|
"logits/rejected": -1.8037395477294922, |
|
"logps/chosen": -341.66839599609375, |
|
"logps/rejected": -266.3909912109375, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.028729846701025963, |
|
"rewards/margins": 0.06697390228509903, |
|
"rewards/rejected": -0.09570374339818954, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9e-05, |
|
"logits/chosen": -2.123415946960449, |
|
"logits/rejected": -2.154893398284912, |
|
"logps/chosen": -304.90008544921875, |
|
"logps/rejected": -325.108154296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11998450756072998, |
|
"rewards/margins": 0.007698964327573776, |
|
"rewards/rejected": -0.12768347561359406, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.95e-05, |
|
"logits/chosen": -2.086604356765747, |
|
"logits/rejected": -2.0624561309814453, |
|
"logps/chosen": -294.25244140625, |
|
"logps/rejected": -324.137939453125, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.06285426020622253, |
|
"rewards/margins": 0.06548047065734863, |
|
"rewards/rejected": -0.12833473086357117, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3e-05, |
|
"logits/chosen": -2.2143869400024414, |
|
"logits/rejected": -2.3379101753234863, |
|
"logps/chosen": -283.2222900390625, |
|
"logps/rejected": -320.791748046875, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06308362632989883, |
|
"rewards/margins": 0.09713932871818542, |
|
"rewards/rejected": -0.16022296249866486, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.05e-05, |
|
"logits/chosen": -2.117171287536621, |
|
"logits/rejected": -2.3281807899475098, |
|
"logps/chosen": -299.3207702636719, |
|
"logps/rejected": -364.2285461425781, |
|
"loss": 0.7127, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.15081080794334412, |
|
"rewards/margins": -0.02138001285493374, |
|
"rewards/rejected": -0.12943080067634583, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1e-05, |
|
"logits/chosen": -1.974008321762085, |
|
"logits/rejected": -2.116246223449707, |
|
"logps/chosen": -263.60394287109375, |
|
"logps/rejected": -367.9918212890625, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1498243808746338, |
|
"rewards/margins": 0.032494522631168365, |
|
"rewards/rejected": -0.18231889605522156, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.15e-05, |
|
"logits/chosen": -2.0844931602478027, |
|
"logits/rejected": -2.224573850631714, |
|
"logps/chosen": -280.8338928222656, |
|
"logps/rejected": -339.91009521484375, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1308109611272812, |
|
"rewards/margins": 0.016523031517863274, |
|
"rewards/rejected": -0.14733397960662842, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"logits/chosen": -2.2176733016967773, |
|
"logits/rejected": -2.2587060928344727, |
|
"logps/chosen": -252.23390197753906, |
|
"logps/rejected": -273.116943359375, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.08397925645112991, |
|
"rewards/margins": 0.031244704499840736, |
|
"rewards/rejected": -0.1152239516377449, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"logits/chosen": -2.1103031635284424, |
|
"logits/rejected": -2.1916050910949707, |
|
"logps/chosen": -253.43565368652344, |
|
"logps/rejected": -299.1423645019531, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11171821504831314, |
|
"rewards/margins": 0.028400154784321785, |
|
"rewards/rejected": -0.14011836051940918, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3e-05, |
|
"logits/chosen": -2.138770580291748, |
|
"logits/rejected": -2.277637481689453, |
|
"logps/chosen": -361.4617919921875, |
|
"logps/rejected": -364.7515869140625, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13355040550231934, |
|
"rewards/margins": 0.10390853881835938, |
|
"rewards/rejected": -0.23745892941951752, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.35e-05, |
|
"logits/chosen": -2.256972312927246, |
|
"logits/rejected": -2.155823230743408, |
|
"logps/chosen": -385.70989990234375, |
|
"logps/rejected": -342.7967529296875, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.20078128576278687, |
|
"rewards/margins": 0.030581658706068993, |
|
"rewards/rejected": -0.2313629388809204, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"logits/chosen": -2.1459546089172363, |
|
"logits/rejected": -2.092371940612793, |
|
"logps/chosen": -302.3841247558594, |
|
"logps/rejected": -248.30654907226562, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13762035965919495, |
|
"rewards/margins": 0.019860554486513138, |
|
"rewards/rejected": -0.15748091042041779, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.45e-05, |
|
"logits/chosen": -1.9295188188552856, |
|
"logits/rejected": -2.1819865703582764, |
|
"logps/chosen": -197.07498168945312, |
|
"logps/rejected": -277.8927917480469, |
|
"loss": 0.7116, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.21833539009094238, |
|
"rewards/margins": -0.016281111165881157, |
|
"rewards/rejected": -0.20205429196357727, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5e-05, |
|
"logits/chosen": -2.0420405864715576, |
|
"logits/rejected": -2.095397710800171, |
|
"logps/chosen": -440.4430236816406, |
|
"logps/rejected": -404.73297119140625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.17053675651550293, |
|
"rewards/margins": 0.025177769362926483, |
|
"rewards/rejected": -0.19571453332901, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.55e-05, |
|
"logits/chosen": -1.8378528356552124, |
|
"logits/rejected": -2.188674211502075, |
|
"logps/chosen": -271.0667724609375, |
|
"logps/rejected": -360.34454345703125, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08768844604492188, |
|
"rewards/margins": 0.06176728755235672, |
|
"rewards/rejected": -0.1494557410478592, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6e-05, |
|
"logits/chosen": -2.31483793258667, |
|
"logits/rejected": -2.28462290763855, |
|
"logps/chosen": -386.14312744140625, |
|
"logps/rejected": -423.07281494140625, |
|
"loss": 0.7184, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.18145808577537537, |
|
"rewards/margins": -0.015067771077156067, |
|
"rewards/rejected": -0.1663903295993805, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.65e-05, |
|
"logits/chosen": -2.0742580890655518, |
|
"logits/rejected": -2.2153687477111816, |
|
"logps/chosen": -255.38543701171875, |
|
"logps/rejected": -290.1117248535156, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0764242559671402, |
|
"rewards/margins": 0.01691494509577751, |
|
"rewards/rejected": -0.0933392122387886, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7e-05, |
|
"logits/chosen": -2.0832855701446533, |
|
"logits/rejected": -2.060133934020996, |
|
"logps/chosen": -340.09197998046875, |
|
"logps/rejected": -342.4574890136719, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12881942093372345, |
|
"rewards/margins": 0.10232281684875488, |
|
"rewards/rejected": -0.23114225268363953, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"logits/chosen": -2.045681953430176, |
|
"logits/rejected": -2.041499137878418, |
|
"logps/chosen": -399.8717956542969, |
|
"logps/rejected": -384.0579528808594, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1383177787065506, |
|
"rewards/margins": 0.0989096462726593, |
|
"rewards/rejected": -0.2372274249792099, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8e-05, |
|
"logits/chosen": -2.0136969089508057, |
|
"logits/rejected": -2.131852149963379, |
|
"logps/chosen": -246.70101928710938, |
|
"logps/rejected": -256.7235412597656, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20958754420280457, |
|
"rewards/margins": 0.1072821319103241, |
|
"rewards/rejected": -0.31686967611312866, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.85e-05, |
|
"logits/chosen": -1.8942384719848633, |
|
"logits/rejected": -1.7946527004241943, |
|
"logps/chosen": -363.3137512207031, |
|
"logps/rejected": -288.844482421875, |
|
"loss": 0.7086, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2392420619726181, |
|
"rewards/margins": -0.015016615390777588, |
|
"rewards/rejected": -0.2242254614830017, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"logits/chosen": -2.284573793411255, |
|
"logits/rejected": -2.2139668464660645, |
|
"logps/chosen": -291.585205078125, |
|
"logps/rejected": -311.04986572265625, |
|
"loss": 0.6962, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.24729153513908386, |
|
"rewards/margins": 0.011042074300348759, |
|
"rewards/rejected": -0.25833362340927124, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"logits/chosen": -2.126842737197876, |
|
"logits/rejected": -2.178943634033203, |
|
"logps/chosen": -289.07049560546875, |
|
"logps/rejected": -267.1867980957031, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09771183133125305, |
|
"rewards/margins": 0.04596526175737381, |
|
"rewards/rejected": -0.14367708563804626, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -1.9899426698684692, |
|
"logits/rejected": -2.218869209289551, |
|
"logps/chosen": -348.4210205078125, |
|
"logps/rejected": -453.83306884765625, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.17673715949058533, |
|
"rewards/margins": 0.17254139482975006, |
|
"rewards/rejected": -0.3492785692214966, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.05e-05, |
|
"logits/chosen": -2.3394789695739746, |
|
"logits/rejected": -2.3765251636505127, |
|
"logps/chosen": -356.1443786621094, |
|
"logps/rejected": -352.0921936035156, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.30028024315834045, |
|
"rewards/margins": 0.05001110956072807, |
|
"rewards/rejected": -0.35029137134552, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1e-05, |
|
"logits/chosen": -2.136016607284546, |
|
"logits/rejected": -2.1530215740203857, |
|
"logps/chosen": -241.19703674316406, |
|
"logps/rejected": -251.56166076660156, |
|
"loss": 0.7461, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.3470645248889923, |
|
"rewards/margins": -0.07802443206310272, |
|
"rewards/rejected": -0.2690401077270508, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.15e-05, |
|
"logits/chosen": -2.1615593433380127, |
|
"logits/rejected": -2.184434413909912, |
|
"logps/chosen": -338.65057373046875, |
|
"logps/rejected": -285.83734130859375, |
|
"loss": 0.734, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3146396279335022, |
|
"rewards/margins": -0.06066913902759552, |
|
"rewards/rejected": -0.2539704740047455, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.2e-05, |
|
"logits/chosen": -2.1108598709106445, |
|
"logits/rejected": -2.383390426635742, |
|
"logps/chosen": -285.7464599609375, |
|
"logps/rejected": -336.65997314453125, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.30576157569885254, |
|
"rewards/margins": 0.13044245541095734, |
|
"rewards/rejected": -0.43620407581329346, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.25e-05, |
|
"logits/chosen": -2.185880422592163, |
|
"logits/rejected": -2.2634472846984863, |
|
"logps/chosen": -382.79132080078125, |
|
"logps/rejected": -426.94805908203125, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2730083465576172, |
|
"rewards/margins": 0.15455365180969238, |
|
"rewards/rejected": -0.4275619387626648, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3e-05, |
|
"logits/chosen": -2.200338363647461, |
|
"logits/rejected": -2.3442678451538086, |
|
"logps/chosen": -295.3491516113281, |
|
"logps/rejected": -292.9728698730469, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1620425134897232, |
|
"rewards/margins": 0.1742367446422577, |
|
"rewards/rejected": -0.3362792432308197, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.35e-05, |
|
"logits/chosen": -2.085541248321533, |
|
"logits/rejected": -2.1654982566833496, |
|
"logps/chosen": -313.8997497558594, |
|
"logps/rejected": -406.6915588378906, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.18983058631420135, |
|
"rewards/margins": 0.09621434658765793, |
|
"rewards/rejected": -0.2860449254512787, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"logits/chosen": -2.0523123741149902, |
|
"logits/rejected": -2.1804168224334717, |
|
"logps/chosen": -278.9588928222656, |
|
"logps/rejected": -358.6983642578125, |
|
"loss": 0.7034, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.369529664516449, |
|
"rewards/margins": 0.06673409789800644, |
|
"rewards/rejected": -0.43626368045806885, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"logits/chosen": -1.9767179489135742, |
|
"logits/rejected": -2.073478937149048, |
|
"logps/chosen": -312.8780212402344, |
|
"logps/rejected": -334.3851623535156, |
|
"loss": 0.721, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.41693034768104553, |
|
"rewards/margins": -0.03711947053670883, |
|
"rewards/rejected": -0.3798108398914337, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5e-05, |
|
"logits/chosen": -2.34897780418396, |
|
"logits/rejected": -2.1732659339904785, |
|
"logps/chosen": -365.05712890625, |
|
"logps/rejected": -338.4190673828125, |
|
"loss": 0.6986, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4183480143547058, |
|
"rewards/margins": 0.03731951862573624, |
|
"rewards/rejected": -0.45566752552986145, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.55e-05, |
|
"logits/chosen": -2.2497520446777344, |
|
"logits/rejected": -2.2454562187194824, |
|
"logps/chosen": -326.4650573730469, |
|
"logps/rejected": -414.0813903808594, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4516112208366394, |
|
"rewards/margins": 0.08267778903245926, |
|
"rewards/rejected": -0.5342890024185181, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.600000000000001e-05, |
|
"logits/chosen": -2.0850160121917725, |
|
"logits/rejected": -2.2057738304138184, |
|
"logps/chosen": -356.4827575683594, |
|
"logps/rejected": -372.5768127441406, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.45258861780166626, |
|
"rewards/margins": 0.37770116329193115, |
|
"rewards/rejected": -0.8302898406982422, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"logits/chosen": -2.3158974647521973, |
|
"logits/rejected": -2.1951651573181152, |
|
"logps/chosen": -314.29595947265625, |
|
"logps/rejected": -327.9600524902344, |
|
"loss": 0.8837, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.591391384601593, |
|
"rewards/margins": -0.23991218209266663, |
|
"rewards/rejected": -0.3514792025089264, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7e-05, |
|
"logits/chosen": -2.166097640991211, |
|
"logits/rejected": -2.2614049911499023, |
|
"logps/chosen": -359.9385986328125, |
|
"logps/rejected": -416.6658020019531, |
|
"loss": 0.6978, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5130646824836731, |
|
"rewards/margins": 0.03734045475721359, |
|
"rewards/rejected": -0.5504050850868225, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.75e-05, |
|
"logits/chosen": -2.368900775909424, |
|
"logits/rejected": -2.303640365600586, |
|
"logps/chosen": -379.1260986328125, |
|
"logps/rejected": -359.70281982421875, |
|
"loss": 0.7422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5878180265426636, |
|
"rewards/margins": -0.014551635831594467, |
|
"rewards/rejected": -0.5732664465904236, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8e-05, |
|
"logits/chosen": -1.9750244617462158, |
|
"logits/rejected": -2.13342547416687, |
|
"logps/chosen": -309.6315002441406, |
|
"logps/rejected": -290.8506164550781, |
|
"loss": 0.706, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5889778733253479, |
|
"rewards/margins": 0.024408388882875443, |
|
"rewards/rejected": -0.6133862733840942, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.85e-05, |
|
"logits/chosen": -2.128643035888672, |
|
"logits/rejected": -2.0525574684143066, |
|
"logps/chosen": -413.3954772949219, |
|
"logps/rejected": -345.6094055175781, |
|
"loss": 0.7972, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7196497917175293, |
|
"rewards/margins": -0.1434842050075531, |
|
"rewards/rejected": -0.5761655569076538, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9e-05, |
|
"logits/chosen": -2.2135891914367676, |
|
"logits/rejected": -2.2574219703674316, |
|
"logps/chosen": -370.8169250488281, |
|
"logps/rejected": -344.8519592285156, |
|
"loss": 0.7296, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5168295502662659, |
|
"rewards/margins": -0.034182578325271606, |
|
"rewards/rejected": -0.48264697194099426, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"logits/chosen": -2.0821609497070312, |
|
"logits/rejected": -2.0011813640594482, |
|
"logps/chosen": -249.1553955078125, |
|
"logps/rejected": -292.9418029785156, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31434231996536255, |
|
"rewards/margins": 0.10270004719495773, |
|
"rewards/rejected": -0.41704243421554565, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"logits/chosen": -2.0994839668273926, |
|
"logits/rejected": -2.1740150451660156, |
|
"logps/chosen": -377.63885498046875, |
|
"logps/rejected": -479.538818359375, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.41925686597824097, |
|
"rewards/margins": 0.06821132451295853, |
|
"rewards/rejected": -0.4874681532382965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999983511654996e-05, |
|
"logits/chosen": -2.200211524963379, |
|
"logits/rejected": -2.1883296966552734, |
|
"logps/chosen": -384.2947998046875, |
|
"logps/rejected": -448.62957763671875, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5274229049682617, |
|
"rewards/margins": 0.025556959211826324, |
|
"rewards/rejected": -0.5529798865318298, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9999340468374787e-05, |
|
"logits/chosen": -2.13332462310791, |
|
"logits/rejected": -2.218052387237549, |
|
"logps/chosen": -311.7938537597656, |
|
"logps/rejected": -272.7942199707031, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3114677965641022, |
|
"rewards/margins": 0.20093847811222076, |
|
"rewards/rejected": -0.5124062895774841, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99985160619992e-05, |
|
"logits/chosen": -2.151207447052002, |
|
"logits/rejected": -2.073565721511841, |
|
"logps/chosen": -324.60223388671875, |
|
"logps/rejected": -350.4017028808594, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4631275534629822, |
|
"rewards/margins": 0.19337055087089539, |
|
"rewards/rejected": -0.6564981341362, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99973619082977e-05, |
|
"logits/chosen": -2.077446699142456, |
|
"logits/rejected": -2.1615357398986816, |
|
"logps/chosen": -347.063232421875, |
|
"logps/rejected": -355.1671142578125, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.387503445148468, |
|
"rewards/margins": 0.18972592055797577, |
|
"rewards/rejected": -0.577229380607605, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9995878022494335e-05, |
|
"logits/chosen": -2.173962354660034, |
|
"logits/rejected": -2.1823246479034424, |
|
"logps/chosen": -378.25152587890625, |
|
"logps/rejected": -369.76983642578125, |
|
"loss": 0.7478, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5083476901054382, |
|
"rewards/margins": -0.091352179646492, |
|
"rewards/rejected": -0.41699549555778503, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9994064424162575e-05, |
|
"logits/chosen": -2.2684097290039062, |
|
"logits/rejected": -2.341747760772705, |
|
"logps/chosen": -422.6853332519531, |
|
"logps/rejected": -422.57330322265625, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5471891760826111, |
|
"rewards/margins": 0.16060353815555573, |
|
"rewards/rejected": -0.7077926993370056, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9991921137225e-05, |
|
"logits/chosen": -2.0649406909942627, |
|
"logits/rejected": -2.0103793144226074, |
|
"logps/chosen": -398.6349182128906, |
|
"logps/rejected": -321.7645263671875, |
|
"loss": 0.7777, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5084972381591797, |
|
"rewards/margins": -0.11370338499546051, |
|
"rewards/rejected": -0.39479386806488037, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998944818995302e-05, |
|
"logits/chosen": -2.1160833835601807, |
|
"logits/rejected": -2.2650928497314453, |
|
"logps/chosen": -319.59326171875, |
|
"logps/rejected": -399.0347900390625, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.46814414858818054, |
|
"rewards/margins": 0.12615980207920074, |
|
"rewards/rejected": -0.5943039059638977, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998664561496647e-05, |
|
"logits/chosen": -2.0261175632476807, |
|
"logits/rejected": -2.01719331741333, |
|
"logps/chosen": -352.3540344238281, |
|
"logps/rejected": -404.3323669433594, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5237185955047607, |
|
"rewards/margins": 0.2665461599826813, |
|
"rewards/rejected": -0.7902647256851196, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998351344923322e-05, |
|
"logits/chosen": -2.1265554428100586, |
|
"logits/rejected": -2.188615322113037, |
|
"logps/chosen": -362.6216125488281, |
|
"logps/rejected": -339.3355407714844, |
|
"loss": 0.7484, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.6888318657875061, |
|
"rewards/margins": -0.090579092502594, |
|
"rewards/rejected": -0.5982527732849121, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998005173406865e-05, |
|
"logits/chosen": -2.379279136657715, |
|
"logits/rejected": -2.329848527908325, |
|
"logps/chosen": -325.26385498046875, |
|
"logps/rejected": -324.79656982421875, |
|
"loss": 0.7852, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8365094065666199, |
|
"rewards/margins": -0.1392899453639984, |
|
"rewards/rejected": -0.6972194910049438, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.997626051513512e-05, |
|
"logits/chosen": -2.126467227935791, |
|
"logits/rejected": -2.1980350017547607, |
|
"logps/chosen": -376.349609375, |
|
"logps/rejected": -436.8643798828125, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.49128276109695435, |
|
"rewards/margins": 0.21428784728050232, |
|
"rewards/rejected": -0.705570638179779, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.997213984244138e-05, |
|
"logits/chosen": -2.1278860569000244, |
|
"logits/rejected": -2.218194007873535, |
|
"logps/chosen": -235.55670166015625, |
|
"logps/rejected": -324.1790466308594, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6700268387794495, |
|
"rewards/margins": 0.23135630786418915, |
|
"rewards/rejected": -0.9013831615447998, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996768977034188e-05, |
|
"logits/chosen": -2.236452579498291, |
|
"logits/rejected": -2.300806999206543, |
|
"logps/chosen": -308.35467529296875, |
|
"logps/rejected": -374.0247497558594, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6465363502502441, |
|
"rewards/margins": 0.05209742486476898, |
|
"rewards/rejected": -0.6986337900161743, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996291035753608e-05, |
|
"logits/chosen": -2.234069347381592, |
|
"logits/rejected": -2.244938611984253, |
|
"logps/chosen": -533.840576171875, |
|
"logps/rejected": -479.3876953125, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6587188243865967, |
|
"rewards/margins": 0.14327090978622437, |
|
"rewards/rejected": -0.801989734172821, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995780166706767e-05, |
|
"logits/chosen": -2.2996180057525635, |
|
"logits/rejected": -2.1304776668548584, |
|
"logps/chosen": -336.5658264160156, |
|
"logps/rejected": -290.9165954589844, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6112417578697205, |
|
"rewards/margins": 0.08546795696020126, |
|
"rewards/rejected": -0.6967097520828247, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995236376632373e-05, |
|
"logits/chosen": -2.143672466278076, |
|
"logits/rejected": -2.0870895385742188, |
|
"logps/chosen": -285.63385009765625, |
|
"logps/rejected": -269.767822265625, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6124986410140991, |
|
"rewards/margins": 0.17768412828445435, |
|
"rewards/rejected": -0.7901827096939087, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994659672703383e-05, |
|
"logits/chosen": -2.0322999954223633, |
|
"logits/rejected": -2.1645522117614746, |
|
"logps/chosen": -294.4024963378906, |
|
"logps/rejected": -434.4449768066406, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6845369338989258, |
|
"rewards/margins": 0.3629693388938904, |
|
"rewards/rejected": -1.047506332397461, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994050062526915e-05, |
|
"logits/chosen": -2.268840789794922, |
|
"logits/rejected": -2.210455894470215, |
|
"logps/chosen": -454.2048034667969, |
|
"logps/rejected": -401.5218200683594, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7424222826957703, |
|
"rewards/margins": 0.03279120847582817, |
|
"rewards/rejected": -0.7752134799957275, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993407554144136e-05, |
|
"logits/chosen": -2.1254310607910156, |
|
"logits/rejected": -2.2457275390625, |
|
"logps/chosen": -263.01318359375, |
|
"logps/rejected": -279.0079040527344, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8383140563964844, |
|
"rewards/margins": 0.3438203036785126, |
|
"rewards/rejected": -1.1821343898773193, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9927321560301686e-05, |
|
"logits/chosen": -1.9596202373504639, |
|
"logits/rejected": -1.9732022285461426, |
|
"logps/chosen": -333.146240234375, |
|
"logps/rejected": -317.2575988769531, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6822391152381897, |
|
"rewards/margins": 0.18031413853168488, |
|
"rewards/rejected": -0.8625531196594238, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.992023877093969e-05, |
|
"logits/chosen": -2.2412517070770264, |
|
"logits/rejected": -2.2476582527160645, |
|
"logps/chosen": -270.1220703125, |
|
"logps/rejected": -297.88116455078125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1133708953857422, |
|
"rewards/margins": 0.07820569723844528, |
|
"rewards/rejected": -1.1915764808654785, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.991282726678215e-05, |
|
"logits/chosen": -2.1082093715667725, |
|
"logits/rejected": -2.260173797607422, |
|
"logps/chosen": -342.9111633300781, |
|
"logps/rejected": -425.59710693359375, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1128913164138794, |
|
"rewards/margins": 0.2950645089149475, |
|
"rewards/rejected": -1.4079556465148926, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990508714559182e-05, |
|
"logits/chosen": -1.9671399593353271, |
|
"logits/rejected": -2.216414213180542, |
|
"logps/chosen": -371.3886413574219, |
|
"logps/rejected": -412.00323486328125, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6249299049377441, |
|
"rewards/margins": 0.6542832255363464, |
|
"rewards/rejected": -2.2792131900787354, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989701850946613e-05, |
|
"logits/chosen": -1.9954829216003418, |
|
"logits/rejected": -2.0708837509155273, |
|
"logps/chosen": -309.5416259765625, |
|
"logps/rejected": -369.55902099609375, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3325786590576172, |
|
"rewards/margins": 0.2878706455230713, |
|
"rewards/rejected": -1.6204493045806885, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988862146483585e-05, |
|
"logits/chosen": -2.04398775100708, |
|
"logits/rejected": -2.3061468601226807, |
|
"logps/chosen": -311.40283203125, |
|
"logps/rejected": -330.2856750488281, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9415925741195679, |
|
"rewards/margins": 0.5770285725593567, |
|
"rewards/rejected": -1.5186210870742798, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987989612246368e-05, |
|
"logits/chosen": -2.1247596740722656, |
|
"logits/rejected": -2.293691396713257, |
|
"logps/chosen": -415.2500915527344, |
|
"logps/rejected": -361.0720520019531, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9840108752250671, |
|
"rewards/margins": 0.4814227521419525, |
|
"rewards/rejected": -1.4654337167739868, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9870842597442755e-05, |
|
"logits/chosen": -2.21590518951416, |
|
"logits/rejected": -2.1943631172180176, |
|
"logps/chosen": -387.32720947265625, |
|
"logps/rejected": -422.17059326171875, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0035918951034546, |
|
"rewards/margins": 0.6130063533782959, |
|
"rewards/rejected": -1.61659836769104, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9861461009195224e-05, |
|
"logits/chosen": -2.2312891483306885, |
|
"logits/rejected": -2.3044235706329346, |
|
"logps/chosen": -297.28729248046875, |
|
"logps/rejected": -300.91070556640625, |
|
"loss": 0.8, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.2972065210342407, |
|
"rewards/margins": -0.13482597470283508, |
|
"rewards/rejected": -1.162380576133728, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9851751481470565e-05, |
|
"logits/chosen": -2.3871798515319824, |
|
"logits/rejected": -2.3541009426116943, |
|
"logps/chosen": -389.8529357910156, |
|
"logps/rejected": -395.540283203125, |
|
"loss": 0.7455, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.4040565490722656, |
|
"rewards/margins": -0.02014276757836342, |
|
"rewards/rejected": -1.3839137554168701, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.984171414234401e-05, |
|
"logits/chosen": -2.3224058151245117, |
|
"logits/rejected": -2.5203804969787598, |
|
"logps/chosen": -278.0612487792969, |
|
"logps/rejected": -292.171142578125, |
|
"loss": 0.698, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2112613916397095, |
|
"rewards/margins": 0.22594159841537476, |
|
"rewards/rejected": -1.437203049659729, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.983134912421485e-05, |
|
"logits/chosen": -2.1884591579437256, |
|
"logits/rejected": -2.0368237495422363, |
|
"logps/chosen": -277.64117431640625, |
|
"logps/rejected": -261.66094970703125, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.172703742980957, |
|
"rewards/margins": 0.33002516627311707, |
|
"rewards/rejected": -1.502728819847107, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.982065656380468e-05, |
|
"logits/chosen": -2.079421281814575, |
|
"logits/rejected": -2.2217986583709717, |
|
"logps/chosen": -295.58087158203125, |
|
"logps/rejected": -291.7632751464844, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9564344882965088, |
|
"rewards/margins": 0.4054575562477112, |
|
"rewards/rejected": -1.3618921041488647, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9809636602155604e-05, |
|
"logits/chosen": -2.1835222244262695, |
|
"logits/rejected": -2.2144436836242676, |
|
"logps/chosen": -248.64321899414062, |
|
"logps/rejected": -231.23239135742188, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2899762392044067, |
|
"rewards/margins": 0.29608777165412903, |
|
"rewards/rejected": -1.586064100265503, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9798289384628355e-05, |
|
"logits/chosen": -2.047929048538208, |
|
"logits/rejected": -2.020115852355957, |
|
"logps/chosen": -270.7432556152344, |
|
"logps/rejected": -295.75714111328125, |
|
"loss": 0.7271, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.225740671157837, |
|
"rewards/margins": 0.14127963781356812, |
|
"rewards/rejected": -1.3670202493667603, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.978661506090042e-05, |
|
"logits/chosen": -2.268289089202881, |
|
"logits/rejected": -2.264258623123169, |
|
"logps/chosen": -335.73406982421875, |
|
"logps/rejected": -326.88641357421875, |
|
"loss": 0.8802, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.066948890686035, |
|
"rewards/margins": -0.09082351624965668, |
|
"rewards/rejected": -1.9761252403259277, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9774613784964e-05, |
|
"logits/chosen": -2.366272449493408, |
|
"logits/rejected": -2.413400888442993, |
|
"logps/chosen": -275.4363708496094, |
|
"logps/rejected": -316.3116149902344, |
|
"loss": 0.705, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7615418434143066, |
|
"rewards/margins": 0.17158068716526031, |
|
"rewards/rejected": -1.9331226348876953, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9762285715124054e-05, |
|
"logits/chosen": -2.370572328567505, |
|
"logits/rejected": -2.273383617401123, |
|
"logps/chosen": -342.66046142578125, |
|
"logps/rejected": -399.414794921875, |
|
"loss": 1.0301, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.7256594896316528, |
|
"rewards/margins": -0.3764263093471527, |
|
"rewards/rejected": -1.3492331504821777, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974963101399614e-05, |
|
"logits/chosen": -2.196343421936035, |
|
"logits/rejected": -2.460721969604492, |
|
"logps/chosen": -255.3898162841797, |
|
"logps/rejected": -318.3489990234375, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9483575820922852, |
|
"rewards/margins": 0.5218918919563293, |
|
"rewards/rejected": -1.4702494144439697, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.973664984850435e-05, |
|
"logits/chosen": -2.305603265762329, |
|
"logits/rejected": -2.2728540897369385, |
|
"logps/chosen": -351.310791015625, |
|
"logps/rejected": -322.8355712890625, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.272456169128418, |
|
"rewards/margins": 0.04384595528244972, |
|
"rewards/rejected": -1.3163020610809326, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9723342389879e-05, |
|
"logits/chosen": -2.463696241378784, |
|
"logits/rejected": -2.424192190170288, |
|
"logps/chosen": -487.9200439453125, |
|
"logps/rejected": -463.71844482421875, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.156785011291504, |
|
"rewards/margins": 0.42006048560142517, |
|
"rewards/rejected": -1.5768455266952515, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.970970881365449e-05, |
|
"logits/chosen": -2.1991195678710938, |
|
"logits/rejected": -2.2735817432403564, |
|
"logps/chosen": -333.13214111328125, |
|
"logps/rejected": -371.25213623046875, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0519524812698364, |
|
"rewards/margins": 0.3118639290332794, |
|
"rewards/rejected": -1.363816261291504, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9695749299666894e-05, |
|
"logits/chosen": -2.0732052326202393, |
|
"logits/rejected": -2.0948421955108643, |
|
"logps/chosen": -355.09381103515625, |
|
"logps/rejected": -370.39007568359375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0995197296142578, |
|
"rewards/margins": 0.12801453471183777, |
|
"rewards/rejected": -1.227534294128418, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9681464032051635e-05, |
|
"logits/chosen": -2.281567335128784, |
|
"logits/rejected": -2.19057559967041, |
|
"logps/chosen": -407.2377624511719, |
|
"logps/rejected": -354.5672302246094, |
|
"loss": 0.8131, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.355703592300415, |
|
"rewards/margins": -0.11926855146884918, |
|
"rewards/rejected": -1.2364351749420166, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966685319924106e-05, |
|
"logits/chosen": -2.3482041358947754, |
|
"logits/rejected": -2.267275094985962, |
|
"logps/chosen": -445.82208251953125, |
|
"logps/rejected": -458.0115051269531, |
|
"loss": 0.7112, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0608644485473633, |
|
"rewards/margins": 0.03552216291427612, |
|
"rewards/rejected": -1.0963865518569946, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.965191699396191e-05, |
|
"logits/chosen": -2.1460695266723633, |
|
"logits/rejected": -2.340147018432617, |
|
"logps/chosen": -305.9909362792969, |
|
"logps/rejected": -318.31256103515625, |
|
"loss": 0.7068, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9504708647727966, |
|
"rewards/margins": 0.06828048825263977, |
|
"rewards/rejected": -1.0187513828277588, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963665561323286e-05, |
|
"logits/chosen": -2.2726097106933594, |
|
"logits/rejected": -2.2365224361419678, |
|
"logps/chosen": -287.2446594238281, |
|
"logps/rejected": -313.90203857421875, |
|
"loss": 0.8693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.1069557666778564, |
|
"rewards/margins": -0.2487190216779709, |
|
"rewards/rejected": -0.8582366704940796, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.962106925836183e-05, |
|
"logits/chosen": -2.1673455238342285, |
|
"logits/rejected": -2.1659748554229736, |
|
"logps/chosen": -363.5993957519531, |
|
"logps/rejected": -345.0345153808594, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8967840075492859, |
|
"rewards/margins": 0.05857213959097862, |
|
"rewards/rejected": -0.9553561210632324, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9605158134943356e-05, |
|
"logits/chosen": -2.167635679244995, |
|
"logits/rejected": -2.10685396194458, |
|
"logps/chosen": -278.79949951171875, |
|
"logps/rejected": -243.8157501220703, |
|
"loss": 0.8424, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8083875179290771, |
|
"rewards/margins": -0.2297360599040985, |
|
"rewards/rejected": -0.5786514282226562, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9588922452855935e-05, |
|
"logits/chosen": -1.9295530319213867, |
|
"logits/rejected": -2.084747314453125, |
|
"logps/chosen": -350.78887939453125, |
|
"logps/rejected": -416.31549072265625, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.61838698387146, |
|
"rewards/margins": 0.09959676116704941, |
|
"rewards/rejected": -0.7179837226867676, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9572362426259176e-05, |
|
"logits/chosen": -2.1817588806152344, |
|
"logits/rejected": -2.141252040863037, |
|
"logps/chosen": -347.57403564453125, |
|
"logps/rejected": -378.94281005859375, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6715837121009827, |
|
"rewards/margins": 0.36849769949913025, |
|
"rewards/rejected": -1.04008150100708, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.955547827359103e-05, |
|
"logits/chosen": -2.249030590057373, |
|
"logits/rejected": -1.901309609413147, |
|
"logps/chosen": -358.05859375, |
|
"logps/rejected": -263.93365478515625, |
|
"loss": 0.7291, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6999510526657104, |
|
"rewards/margins": -0.013744346797466278, |
|
"rewards/rejected": -0.6862066984176636, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.953827021756489e-05, |
|
"logits/chosen": -1.9777555465698242, |
|
"logits/rejected": -1.9771215915679932, |
|
"logps/chosen": -374.28192138671875, |
|
"logps/rejected": -446.4751892089844, |
|
"loss": 0.7191, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6933267116546631, |
|
"rewards/margins": 0.10663817822933197, |
|
"rewards/rejected": -0.7999648451805115, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.952073848516663e-05, |
|
"logits/chosen": -2.353224515914917, |
|
"logits/rejected": -2.316944122314453, |
|
"logps/chosen": -409.6300048828125, |
|
"logps/rejected": -406.71124267578125, |
|
"loss": 0.7778, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6479594707489014, |
|
"rewards/margins": -0.08428651094436646, |
|
"rewards/rejected": -0.5636729598045349, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9502883307651674e-05, |
|
"logits/chosen": -1.9488294124603271, |
|
"logits/rejected": -1.9255703687667847, |
|
"logps/chosen": -289.2491760253906, |
|
"logps/rejected": -412.5265808105469, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4605676531791687, |
|
"rewards/margins": 0.313413143157959, |
|
"rewards/rejected": -0.7739807367324829, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9484704920541856e-05, |
|
"logits/chosen": -1.9965554475784302, |
|
"logits/rejected": -2.217256546020508, |
|
"logps/chosen": -285.490966796875, |
|
"logps/rejected": -356.04559326171875, |
|
"loss": 0.7259, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5536916851997375, |
|
"rewards/margins": 0.03260548785328865, |
|
"rewards/rejected": -0.5862972140312195, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9466203563622424e-05, |
|
"logits/chosen": -2.1669509410858154, |
|
"logits/rejected": -2.293706178665161, |
|
"logps/chosen": -394.7683410644531, |
|
"logps/rejected": -457.5030822753906, |
|
"loss": 0.7323, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6224880814552307, |
|
"rewards/margins": 0.012505665421485901, |
|
"rewards/rejected": -0.6349937915802002, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.944737948093876e-05, |
|
"logits/chosen": -1.9717931747436523, |
|
"logits/rejected": -2.068146228790283, |
|
"logps/chosen": -258.3878173828125, |
|
"logps/rejected": -263.28363037109375, |
|
"loss": 0.6178, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.25086289644241333, |
|
"rewards/margins": 0.19410811364650726, |
|
"rewards/rejected": -0.4449709951877594, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.942823292079325e-05, |
|
"logits/chosen": -2.1289565563201904, |
|
"logits/rejected": -2.1196882724761963, |
|
"logps/chosen": -301.2019958496094, |
|
"logps/rejected": -265.33685302734375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6575217843055725, |
|
"rewards/margins": 0.06726698577404022, |
|
"rewards/rejected": -0.7247887849807739, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.940876413574195e-05, |
|
"logits/chosen": -2.016998291015625, |
|
"logits/rejected": -2.271897077560425, |
|
"logps/chosen": -302.09454345703125, |
|
"logps/rejected": -424.992919921875, |
|
"loss": 0.7552, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4835960865020752, |
|
"rewards/margins": -0.06482114642858505, |
|
"rewards/rejected": -0.41877493262290955, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.938897338259132e-05, |
|
"logits/chosen": -2.087447166442871, |
|
"logits/rejected": -1.9530307054519653, |
|
"logps/chosen": -336.6803894042969, |
|
"logps/rejected": -300.6052551269531, |
|
"loss": 0.727, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.46374762058258057, |
|
"rewards/margins": -0.05146384611725807, |
|
"rewards/rejected": -0.4122838079929352, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.936886092239475e-05, |
|
"logits/chosen": -2.277801990509033, |
|
"logits/rejected": -2.1965315341949463, |
|
"logps/chosen": -356.08038330078125, |
|
"logps/rejected": -355.2462158203125, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.35021650791168213, |
|
"rewards/margins": 0.04458609223365784, |
|
"rewards/rejected": -0.39480262994766235, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93484270204492e-05, |
|
"logits/chosen": -2.132495403289795, |
|
"logits/rejected": -2.1767892837524414, |
|
"logps/chosen": -416.3873291015625, |
|
"logps/rejected": -446.61444091796875, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3273276686668396, |
|
"rewards/margins": 0.06858228892087936, |
|
"rewards/rejected": -0.39590996503829956, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.932767194629164e-05, |
|
"logits/chosen": -1.9876537322998047, |
|
"logits/rejected": -2.0633606910705566, |
|
"logps/chosen": -398.9766845703125, |
|
"logps/rejected": -377.8792419433594, |
|
"loss": 0.7513, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.6136964559555054, |
|
"rewards/margins": -0.046500250697135925, |
|
"rewards/rejected": -0.567196249961853, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930659597369554e-05, |
|
"logits/chosen": -2.009962320327759, |
|
"logits/rejected": -2.057422399520874, |
|
"logps/chosen": -303.35882568359375, |
|
"logps/rejected": -332.4872131347656, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.36901330947875977, |
|
"rewards/margins": 0.15158693492412567, |
|
"rewards/rejected": -0.5206002593040466, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.928519938066722e-05, |
|
"logits/chosen": -1.9507710933685303, |
|
"logits/rejected": -1.9371974468231201, |
|
"logps/chosen": -350.26031494140625, |
|
"logps/rejected": -332.67572021484375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.47843092679977417, |
|
"rewards/margins": 0.03055078350007534, |
|
"rewards/rejected": -0.5089817047119141, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.926348244944221e-05, |
|
"logits/chosen": -1.8575907945632935, |
|
"logits/rejected": -1.8456588983535767, |
|
"logps/chosen": -298.292236328125, |
|
"logps/rejected": -305.32904052734375, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5008257031440735, |
|
"rewards/margins": 0.18142402172088623, |
|
"rewards/rejected": -0.6822497248649597, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9241445466481504e-05, |
|
"logits/chosen": -1.9923934936523438, |
|
"logits/rejected": -2.1334385871887207, |
|
"logps/chosen": -278.231689453125, |
|
"logps/rejected": -368.37152099609375, |
|
"loss": 0.7279, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.43066713213920593, |
|
"rewards/margins": -0.028812985867261887, |
|
"rewards/rejected": -0.40185415744781494, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.921908872246782e-05, |
|
"logits/chosen": -2.099191665649414, |
|
"logits/rejected": -2.299363136291504, |
|
"logps/chosen": -298.4676208496094, |
|
"logps/rejected": -369.09075927734375, |
|
"loss": 0.5683, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.42483800649642944, |
|
"rewards/margins": 0.3080124258995056, |
|
"rewards/rejected": -0.7328504323959351, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.91964125123017e-05, |
|
"logits/chosen": -2.2204477787017822, |
|
"logits/rejected": -2.0866146087646484, |
|
"logps/chosen": -417.0639343261719, |
|
"logps/rejected": -406.372802734375, |
|
"loss": 0.7542, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6449086666107178, |
|
"rewards/margins": -0.07101374119520187, |
|
"rewards/rejected": -0.5738948583602905, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9173417135097715e-05, |
|
"logits/chosen": -2.208749294281006, |
|
"logits/rejected": -2.0630850791931152, |
|
"logps/chosen": -286.49749755859375, |
|
"logps/rejected": -274.74822998046875, |
|
"loss": 0.7148, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.36861342191696167, |
|
"rewards/margins": -0.0281071700155735, |
|
"rewards/rejected": -0.3405062258243561, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9150102894180415e-05, |
|
"logits/chosen": -1.9276704788208008, |
|
"logits/rejected": -1.7694021463394165, |
|
"logps/chosen": -305.76702880859375, |
|
"logps/rejected": -299.9190368652344, |
|
"loss": 0.7089, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5562857985496521, |
|
"rewards/margins": 0.01368020474910736, |
|
"rewards/rejected": -0.5699659585952759, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.91264700970804e-05, |
|
"logits/chosen": -2.1296586990356445, |
|
"logits/rejected": -2.081202983856201, |
|
"logps/chosen": -253.95947265625, |
|
"logps/rejected": -282.4194641113281, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3127891719341278, |
|
"rewards/margins": 0.14373984932899475, |
|
"rewards/rejected": -0.45652902126312256, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.910251905553025e-05, |
|
"logits/chosen": -2.120836019515991, |
|
"logits/rejected": -2.152477979660034, |
|
"logps/chosen": -461.200927734375, |
|
"logps/rejected": -470.2550048828125, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.39715662598609924, |
|
"rewards/margins": 0.12175296247005463, |
|
"rewards/rejected": -0.5189095735549927, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9078250085460384e-05, |
|
"logits/chosen": -2.0472662448883057, |
|
"logits/rejected": -2.0290732383728027, |
|
"logps/chosen": -368.0150451660156, |
|
"logps/rejected": -276.0652770996094, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5132391452789307, |
|
"rewards/margins": 0.08550245314836502, |
|
"rewards/rejected": -0.5987416505813599, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.905366350699493e-05, |
|
"logits/chosen": -2.0048584938049316, |
|
"logits/rejected": -2.020286798477173, |
|
"logps/chosen": -341.3944396972656, |
|
"logps/rejected": -444.3612060546875, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.48147350549697876, |
|
"rewards/margins": 0.06385768949985504, |
|
"rewards/rejected": -0.5453312397003174, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.902875964444746e-05, |
|
"logits/chosen": -1.9281437397003174, |
|
"logits/rejected": -2.0846328735351562, |
|
"logps/chosen": -397.08453369140625, |
|
"logps/rejected": -412.8231201171875, |
|
"loss": 0.7184, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5112196207046509, |
|
"rewards/margins": 0.010270453989505768, |
|
"rewards/rejected": -0.5214900374412537, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9003538826316795e-05, |
|
"logits/chosen": -1.9262962341308594, |
|
"logits/rejected": -1.8548663854599, |
|
"logps/chosen": -326.5889587402344, |
|
"logps/rejected": -336.98443603515625, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.37680521607398987, |
|
"rewards/margins": 0.14640317857265472, |
|
"rewards/rejected": -0.5232084393501282, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.897800138528253e-05, |
|
"logits/chosen": -2.234349250793457, |
|
"logits/rejected": -2.1834311485290527, |
|
"logps/chosen": -302.0099182128906, |
|
"logps/rejected": -293.316162109375, |
|
"loss": 0.7266, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.42696690559387207, |
|
"rewards/margins": -0.030406557023525238, |
|
"rewards/rejected": -0.39656031131744385, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8952147658200806e-05, |
|
"logits/chosen": -1.9105005264282227, |
|
"logits/rejected": -2.006873607635498, |
|
"logps/chosen": -307.2244873046875, |
|
"logps/rejected": -327.2491455078125, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4698525667190552, |
|
"rewards/margins": 0.18418261408805847, |
|
"rewards/rejected": -0.6540351510047913, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.892597798609976e-05, |
|
"logits/chosen": -1.8944015502929688, |
|
"logits/rejected": -1.8353430032730103, |
|
"logps/chosen": -372.86474609375, |
|
"logps/rejected": -328.3533020019531, |
|
"loss": 0.7458, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5451046228408813, |
|
"rewards/margins": -0.07491657137870789, |
|
"rewards/rejected": -0.4701881408691406, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.889949271417504e-05, |
|
"logits/chosen": -2.0069048404693604, |
|
"logits/rejected": -2.1132149696350098, |
|
"logps/chosen": -313.0219421386719, |
|
"logps/rejected": -377.8055114746094, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4439217150211334, |
|
"rewards/margins": 0.06591986864805222, |
|
"rewards/rejected": -0.5098415613174438, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88726921917853e-05, |
|
"logits/chosen": -1.847022294998169, |
|
"logits/rejected": -1.861661434173584, |
|
"logps/chosen": -222.52427673339844, |
|
"logps/rejected": -241.1929931640625, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.39014291763305664, |
|
"rewards/margins": 0.09657852351665497, |
|
"rewards/rejected": -0.486721396446228, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.884557677244754e-05, |
|
"logits/chosen": -1.9963531494140625, |
|
"logits/rejected": -2.0941479206085205, |
|
"logps/chosen": -274.606689453125, |
|
"logps/rejected": -268.11395263671875, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4282204508781433, |
|
"rewards/margins": 0.23392558097839355, |
|
"rewards/rejected": -0.6621460318565369, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.881814681383248e-05, |
|
"logits/chosen": -1.8474693298339844, |
|
"logits/rejected": -2.122403144836426, |
|
"logps/chosen": -254.52745056152344, |
|
"logps/rejected": -355.0078430175781, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.3664317727088928, |
|
"rewards/margins": 0.07878479361534119, |
|
"rewards/rejected": -0.4452165961265564, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879040267775981e-05, |
|
"logits/chosen": -1.891446828842163, |
|
"logits/rejected": -1.794939637184143, |
|
"logps/chosen": -383.87451171875, |
|
"logps/rejected": -405.0089416503906, |
|
"loss": 0.7267, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.592932403087616, |
|
"rewards/margins": -0.023355990648269653, |
|
"rewards/rejected": -0.5695763826370239, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8762344730193445e-05, |
|
"logits/chosen": -1.923872709274292, |
|
"logits/rejected": -2.09379243850708, |
|
"logps/chosen": -251.91183471679688, |
|
"logps/rejected": -267.0522155761719, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5020468831062317, |
|
"rewards/margins": 0.21398763358592987, |
|
"rewards/rejected": -0.7160345315933228, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.873397334123667e-05, |
|
"logits/chosen": -1.7248388528823853, |
|
"logits/rejected": -2.1145920753479004, |
|
"logps/chosen": -270.976806640625, |
|
"logps/rejected": -385.2688293457031, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5947083234786987, |
|
"rewards/margins": 0.23461143672466278, |
|
"rewards/rejected": -0.8293198347091675, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8705288885127295e-05, |
|
"logits/chosen": -2.289656162261963, |
|
"logits/rejected": -2.2240102291107178, |
|
"logps/chosen": -408.82403564453125, |
|
"logps/rejected": -361.85711669921875, |
|
"loss": 0.7461, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6849729418754578, |
|
"rewards/margins": -0.0317959301173687, |
|
"rewards/rejected": -0.6531770825386047, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.867629174023268e-05, |
|
"logits/chosen": -2.281062602996826, |
|
"logits/rejected": -2.0911028385162354, |
|
"logps/chosen": -390.974365234375, |
|
"logps/rejected": -376.9208984375, |
|
"loss": 0.7288, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5994336009025574, |
|
"rewards/margins": -0.05144501477479935, |
|
"rewards/rejected": -0.5479886531829834, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.864698228904478e-05, |
|
"logits/chosen": -1.8639394044876099, |
|
"logits/rejected": -1.969814658164978, |
|
"logps/chosen": -390.7828674316406, |
|
"logps/rejected": -316.2579345703125, |
|
"loss": 0.7376, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6630164384841919, |
|
"rewards/margins": -0.02051009237766266, |
|
"rewards/rejected": -0.6425063610076904, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.861736091817506e-05, |
|
"logits/chosen": -2.084822654724121, |
|
"logits/rejected": -1.8905832767486572, |
|
"logps/chosen": -373.1632080078125, |
|
"logps/rejected": -256.5694274902344, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6736202239990234, |
|
"rewards/margins": 0.06500263512134552, |
|
"rewards/rejected": -0.738622784614563, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.858742801834942e-05, |
|
"logits/chosen": -2.0825746059417725, |
|
"logits/rejected": -1.8865240812301636, |
|
"logps/chosen": -371.4250793457031, |
|
"logps/rejected": -295.2694396972656, |
|
"loss": 0.7168, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5424689650535583, |
|
"rewards/margins": -0.019147779792547226, |
|
"rewards/rejected": -0.5233211517333984, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.855718398440307e-05, |
|
"logits/chosen": -2.1740431785583496, |
|
"logits/rejected": -1.8387389183044434, |
|
"logps/chosen": -307.1974182128906, |
|
"logps/rejected": -293.7174987792969, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6124351620674133, |
|
"rewards/margins": 0.22457748651504517, |
|
"rewards/rejected": -0.8370125889778137, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.852662921527522e-05, |
|
"logits/chosen": -2.0800061225891113, |
|
"logits/rejected": -2.2033514976501465, |
|
"logps/chosen": -314.9759216308594, |
|
"logps/rejected": -373.83087158203125, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7755237221717834, |
|
"rewards/margins": 0.17267946898937225, |
|
"rewards/rejected": -0.9482032060623169, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8495764114003966e-05, |
|
"logits/chosen": -2.0974619388580322, |
|
"logits/rejected": -2.1461567878723145, |
|
"logps/chosen": -360.7484436035156, |
|
"logps/rejected": -402.9318542480469, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7390251159667969, |
|
"rewards/margins": 0.220990851521492, |
|
"rewards/rejected": -0.9600158929824829, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8464589087720846e-05, |
|
"logits/chosen": -2.0587756633758545, |
|
"logits/rejected": -1.9800175428390503, |
|
"logps/chosen": -287.4089050292969, |
|
"logps/rejected": -284.52667236328125, |
|
"loss": 0.7495, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.6148634552955627, |
|
"rewards/margins": -0.05161774903535843, |
|
"rewards/rejected": -0.5632455945014954, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8433104547645527e-05, |
|
"logits/chosen": -2.166761875152588, |
|
"logits/rejected": -2.1352829933166504, |
|
"logps/chosen": -270.46527099609375, |
|
"logps/rejected": -276.6797180175781, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7134184241294861, |
|
"rewards/margins": 0.1845559924840927, |
|
"rewards/rejected": -0.89797443151474, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.840131090908038e-05, |
|
"logits/chosen": -2.013166904449463, |
|
"logits/rejected": -2.0285515785217285, |
|
"logps/chosen": -250.39393615722656, |
|
"logps/rejected": -247.9554443359375, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5990790128707886, |
|
"rewards/margins": 0.09994714707136154, |
|
"rewards/rejected": -0.6990260481834412, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8369208591404997e-05, |
|
"logits/chosen": -2.070176124572754, |
|
"logits/rejected": -2.184037446975708, |
|
"logps/chosen": -290.0277099609375, |
|
"logps/rejected": -408.5084228515625, |
|
"loss": 0.7326, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8427464962005615, |
|
"rewards/margins": -0.015421424061059952, |
|
"rewards/rejected": -0.8273251056671143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.833679801807064e-05, |
|
"logits/chosen": -1.9874109029769897, |
|
"logits/rejected": -2.082557201385498, |
|
"logps/chosen": -298.5231628417969, |
|
"logps/rejected": -384.05853271484375, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9263674020767212, |
|
"rewards/margins": 0.19633004069328308, |
|
"rewards/rejected": -1.1226974725723267, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8304079616594686e-05, |
|
"logits/chosen": -2.2528278827667236, |
|
"logits/rejected": -2.320962905883789, |
|
"logps/chosen": -476.85968017578125, |
|
"logps/rejected": -352.87298583984375, |
|
"loss": 0.7128, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9438900947570801, |
|
"rewards/margins": 0.07938252389431, |
|
"rewards/rejected": -1.0232725143432617, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8271053818554965e-05, |
|
"logits/chosen": -2.0623865127563477, |
|
"logits/rejected": -2.1591854095458984, |
|
"logps/chosen": -272.2887878417969, |
|
"logps/rejected": -340.0892639160156, |
|
"loss": 0.726, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6589000821113586, |
|
"rewards/margins": -0.008545447140932083, |
|
"rewards/rejected": -0.6503546833992004, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.823772105958408e-05, |
|
"logits/chosen": -2.2021212577819824, |
|
"logits/rejected": -2.2701587677001953, |
|
"logps/chosen": -332.5159912109375, |
|
"logps/rejected": -383.0360412597656, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8989717960357666, |
|
"rewards/margins": 0.3648967444896698, |
|
"rewards/rejected": -1.2638685703277588, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820408177936365e-05, |
|
"logits/chosen": -2.264617443084717, |
|
"logits/rejected": -2.322549343109131, |
|
"logps/chosen": -434.6703796386719, |
|
"logps/rejected": -497.743408203125, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8982381820678711, |
|
"rewards/margins": 0.15654003620147705, |
|
"rewards/rejected": -1.0547782182693481, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.817013642161853e-05, |
|
"logits/chosen": -2.034374237060547, |
|
"logits/rejected": -1.8990505933761597, |
|
"logps/chosen": -335.35137939453125, |
|
"logps/rejected": -308.3555603027344, |
|
"loss": 0.7744, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9858347773551941, |
|
"rewards/margins": -0.07680069655179977, |
|
"rewards/rejected": -0.9090341925621033, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.813588543411093e-05, |
|
"logits/chosen": -2.0272958278656006, |
|
"logits/rejected": -2.040910243988037, |
|
"logps/chosen": -263.2490234375, |
|
"logps/rejected": -330.78326416015625, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.806855320930481, |
|
"rewards/margins": 0.22991889715194702, |
|
"rewards/rejected": -1.0367741584777832, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.810132926863454e-05, |
|
"logits/chosen": -1.954245924949646, |
|
"logits/rejected": -2.222029685974121, |
|
"logps/chosen": -344.208984375, |
|
"logps/rejected": -378.04827880859375, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7623510360717773, |
|
"rewards/margins": 0.3259902894496918, |
|
"rewards/rejected": -1.088341236114502, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.806646838100852e-05, |
|
"logits/chosen": -1.9601213932037354, |
|
"logits/rejected": -1.9726929664611816, |
|
"logps/chosen": -363.0174255371094, |
|
"logps/rejected": -362.3369140625, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.85859614610672, |
|
"rewards/margins": 0.1350986659526825, |
|
"rewards/rejected": -0.9936947822570801, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.803130323107157e-05, |
|
"logits/chosen": -2.3228697776794434, |
|
"logits/rejected": -2.4339702129364014, |
|
"logps/chosen": -412.57208251953125, |
|
"logps/rejected": -507.545166015625, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8819563984870911, |
|
"rewards/margins": 0.24080882966518402, |
|
"rewards/rejected": -1.1227651834487915, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7995834282675764e-05, |
|
"logits/chosen": -1.9379347562789917, |
|
"logits/rejected": -1.9101741313934326, |
|
"logps/chosen": -336.1128234863281, |
|
"logps/rejected": -344.0860595703125, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7465636730194092, |
|
"rewards/margins": 0.16460736095905304, |
|
"rewards/rejected": -0.9111709594726562, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.796006200368054e-05, |
|
"logits/chosen": -1.9772237539291382, |
|
"logits/rejected": -2.0779454708099365, |
|
"logps/chosen": -300.3962097167969, |
|
"logps/rejected": -379.80230712890625, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6897981762886047, |
|
"rewards/margins": 0.20077620446681976, |
|
"rewards/rejected": -0.8905743360519409, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.79239868659464e-05, |
|
"logits/chosen": -2.019927978515625, |
|
"logits/rejected": -2.08197283744812, |
|
"logps/chosen": -249.34629821777344, |
|
"logps/rejected": -299.1027526855469, |
|
"loss": 0.6968, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8061387538909912, |
|
"rewards/margins": 0.06293849647045135, |
|
"rewards/rejected": -0.8690773248672485, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788760934532883e-05, |
|
"logits/chosen": -2.124903678894043, |
|
"logits/rejected": -2.041473865509033, |
|
"logps/chosen": -268.1578674316406, |
|
"logps/rejected": -329.6964111328125, |
|
"loss": 0.7397, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9142539501190186, |
|
"rewards/margins": 0.021660268306732178, |
|
"rewards/rejected": -0.935914158821106, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785092992167192e-05, |
|
"logits/chosen": -2.1390061378479004, |
|
"logits/rejected": -2.125261068344116, |
|
"logps/chosen": -268.4858703613281, |
|
"logps/rejected": -305.736083984375, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7436612248420715, |
|
"rewards/margins": 0.18916505575180054, |
|
"rewards/rejected": -0.9328262209892273, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.781394907880204e-05, |
|
"logits/chosen": -2.0572445392608643, |
|
"logits/rejected": -2.2498245239257812, |
|
"logps/chosen": -288.4680480957031, |
|
"logps/rejected": -313.41729736328125, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6026442646980286, |
|
"rewards/margins": 0.3183574676513672, |
|
"rewards/rejected": -0.9210017919540405, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.777666730452151e-05, |
|
"logits/chosen": -1.8694506883621216, |
|
"logits/rejected": -2.019477367401123, |
|
"logps/chosen": -265.6673889160156, |
|
"logps/rejected": -356.0314025878906, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7154449820518494, |
|
"rewards/margins": 0.4170495271682739, |
|
"rewards/rejected": -1.1324944496154785, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7739085090602145e-05, |
|
"logits/chosen": -2.254331111907959, |
|
"logits/rejected": -2.3572165966033936, |
|
"logps/chosen": -305.95074462890625, |
|
"logps/rejected": -340.34893798828125, |
|
"loss": 0.7385, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7424416542053223, |
|
"rewards/margins": 0.05813989043235779, |
|
"rewards/rejected": -0.8005815148353577, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.770120293277875e-05, |
|
"logits/chosen": -2.0773184299468994, |
|
"logits/rejected": -2.0856757164001465, |
|
"logps/chosen": -351.6342468261719, |
|
"logps/rejected": -304.958251953125, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8276374936103821, |
|
"rewards/margins": 0.34980258345603943, |
|
"rewards/rejected": -1.1774399280548096, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.76630213307426e-05, |
|
"logits/chosen": -1.996044635772705, |
|
"logits/rejected": -2.1620826721191406, |
|
"logps/chosen": -286.35565185546875, |
|
"logps/rejected": -381.3570861816406, |
|
"loss": 0.8382, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9452384114265442, |
|
"rewards/margins": -0.1867210865020752, |
|
"rewards/rejected": -0.7585172057151794, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.762454078813483e-05, |
|
"logits/chosen": -1.959717035293579, |
|
"logits/rejected": -2.038839340209961, |
|
"logps/chosen": -327.7545471191406, |
|
"logps/rejected": -336.3837585449219, |
|
"loss": 0.7531, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8737196326255798, |
|
"rewards/margins": -0.06513047218322754, |
|
"rewards/rejected": -0.8085891008377075, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.758576181253981e-05, |
|
"logits/chosen": -2.221623659133911, |
|
"logits/rejected": -2.0508334636688232, |
|
"logps/chosen": -380.0113830566406, |
|
"logps/rejected": -305.5745544433594, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.907548189163208, |
|
"rewards/margins": 0.19952180981636047, |
|
"rewards/rejected": -1.107069969177246, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754668491547845e-05, |
|
"logits/chosen": -2.2138311862945557, |
|
"logits/rejected": -1.9480187892913818, |
|
"logps/chosen": -353.3214111328125, |
|
"logps/rejected": -298.512939453125, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7623633146286011, |
|
"rewards/margins": 0.12673720717430115, |
|
"rewards/rejected": -0.8891006112098694, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.750731061240143e-05, |
|
"logits/chosen": -2.000711679458618, |
|
"logits/rejected": -2.172668933868408, |
|
"logps/chosen": -283.4869384765625, |
|
"logps/rejected": -291.0225830078125, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.46767231822013855, |
|
"rewards/margins": 0.11719869822263718, |
|
"rewards/rejected": -0.5848710536956787, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746763942268243e-05, |
|
"logits/chosen": -1.973673701286316, |
|
"logits/rejected": -2.03965163230896, |
|
"logps/chosen": -380.54974365234375, |
|
"logps/rejected": -390.66119384765625, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5797699093818665, |
|
"rewards/margins": 0.08640918880701065, |
|
"rewards/rejected": -0.6661791205406189, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.742767186961125e-05, |
|
"logits/chosen": -2.1579782962799072, |
|
"logits/rejected": -2.12062668800354, |
|
"logps/chosen": -371.7868957519531, |
|
"logps/rejected": -279.1838684082031, |
|
"loss": 0.7117, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.43902456760406494, |
|
"rewards/margins": 0.0770123153924942, |
|
"rewards/rejected": -0.5160369277000427, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7387408480386945e-05, |
|
"logits/chosen": -2.1720871925354004, |
|
"logits/rejected": -2.2602319717407227, |
|
"logps/chosen": -340.2154235839844, |
|
"logps/rejected": -415.40625, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4649474322795868, |
|
"rewards/margins": 0.14594462513923645, |
|
"rewards/rejected": -0.6108919978141785, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7346849786110834e-05, |
|
"logits/chosen": -1.956856608390808, |
|
"logits/rejected": -2.2350995540618896, |
|
"logps/chosen": -277.75689697265625, |
|
"logps/rejected": -373.5628967285156, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.30955129861831665, |
|
"rewards/margins": 0.2196839451789856, |
|
"rewards/rejected": -0.529235303401947, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7305996321779516e-05, |
|
"logits/chosen": -1.9420428276062012, |
|
"logits/rejected": -2.040480613708496, |
|
"logps/chosen": -330.3153991699219, |
|
"logps/rejected": -419.3126220703125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7027783393859863, |
|
"rewards/margins": 0.27350372076034546, |
|
"rewards/rejected": -0.976282000541687, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.726484862627779e-05, |
|
"logits/chosen": -2.0949978828430176, |
|
"logits/rejected": -2.049705743789673, |
|
"logps/chosen": -399.0694580078125, |
|
"logps/rejected": -362.0954284667969, |
|
"loss": 0.7168, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.26283103227615356, |
|
"rewards/margins": 0.01691259816288948, |
|
"rewards/rejected": -0.27974364161491394, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.722340724237159e-05, |
|
"logits/chosen": -1.8245809078216553, |
|
"logits/rejected": -2.162997007369995, |
|
"logps/chosen": -250.3448486328125, |
|
"logps/rejected": -344.0015563964844, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.40172523260116577, |
|
"rewards/margins": 0.23018088936805725, |
|
"rewards/rejected": -0.6319061517715454, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.718167271670077e-05, |
|
"logits/chosen": -2.0060951709747314, |
|
"logits/rejected": -2.127163887023926, |
|
"logps/chosen": -301.98492431640625, |
|
"logps/rejected": -334.6331481933594, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17217136919498444, |
|
"rewards/margins": 0.36563175916671753, |
|
"rewards/rejected": -0.5378031730651855, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7139645599771956e-05, |
|
"logits/chosen": -2.229623794555664, |
|
"logits/rejected": -2.2746386528015137, |
|
"logps/chosen": -340.92559814453125, |
|
"logps/rejected": -386.106689453125, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6003557443618774, |
|
"rewards/margins": 0.10827502608299255, |
|
"rewards/rejected": -0.7086308002471924, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.709732644595122e-05, |
|
"logits/chosen": -2.075230360031128, |
|
"logits/rejected": -1.8063032627105713, |
|
"logps/chosen": -322.08856201171875, |
|
"logps/rejected": -251.28103637695312, |
|
"loss": 0.7255, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.5996315479278564, |
|
"rewards/margins": -0.027767587453126907, |
|
"rewards/rejected": -0.5718639492988586, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7054715813456795e-05, |
|
"logits/chosen": -2.074021816253662, |
|
"logits/rejected": -1.947763442993164, |
|
"logps/chosen": -357.5520324707031, |
|
"logps/rejected": -347.5523681640625, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7392159700393677, |
|
"rewards/margins": 0.21554100513458252, |
|
"rewards/rejected": -0.954757034778595, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.701181426435175e-05, |
|
"logits/chosen": -1.9467442035675049, |
|
"logits/rejected": -1.974783182144165, |
|
"logps/chosen": -358.291748046875, |
|
"logps/rejected": -400.4483947753906, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.329414039850235, |
|
"rewards/margins": 0.15945447981357574, |
|
"rewards/rejected": -0.4888685643672943, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.69686223645365e-05, |
|
"logits/chosen": -2.1252236366271973, |
|
"logits/rejected": -2.0597469806671143, |
|
"logps/chosen": -314.242919921875, |
|
"logps/rejected": -304.12176513671875, |
|
"loss": 0.7022, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.42311131954193115, |
|
"rewards/margins": 0.1071557104587555, |
|
"rewards/rejected": -0.5302670001983643, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.692514068374142e-05, |
|
"logits/chosen": -2.0920791625976562, |
|
"logits/rejected": -2.227646827697754, |
|
"logps/chosen": -389.5137634277344, |
|
"logps/rejected": -376.8587341308594, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4674568772315979, |
|
"rewards/margins": 0.22089587152004242, |
|
"rewards/rejected": -0.6883527636528015, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6881369795519266e-05, |
|
"logits/chosen": -2.0833654403686523, |
|
"logits/rejected": -2.075024366378784, |
|
"logps/chosen": -489.9222412109375, |
|
"logps/rejected": -418.9756164550781, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.603772759437561, |
|
"rewards/margins": 0.2230098843574524, |
|
"rewards/rejected": -0.8267825841903687, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.683731027723764e-05, |
|
"logits/chosen": -2.1447250843048096, |
|
"logits/rejected": -2.3132801055908203, |
|
"logps/chosen": -311.90301513671875, |
|
"logps/rejected": -416.7982177734375, |
|
"loss": 0.72, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.117811679840088, |
|
"rewards/margins": 0.1118747889995575, |
|
"rewards/rejected": -1.2296864986419678, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.679296271007137e-05, |
|
"logits/chosen": -1.8714052438735962, |
|
"logits/rejected": -1.9733188152313232, |
|
"logps/chosen": -378.4604797363281, |
|
"logps/rejected": -340.4316101074219, |
|
"loss": 0.7227, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.46567368507385254, |
|
"rewards/margins": 0.10245680809020996, |
|
"rewards/rejected": -0.5681304931640625, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.674832767899486e-05, |
|
"logits/chosen": -2.0161495208740234, |
|
"logits/rejected": -2.013415813446045, |
|
"logps/chosen": -334.1321105957031, |
|
"logps/rejected": -480.6463928222656, |
|
"loss": 0.7282, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8040814399719238, |
|
"rewards/margins": 0.06382356584072113, |
|
"rewards/rejected": -0.8679050207138062, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6703405772774325e-05, |
|
"logits/chosen": -2.1323914527893066, |
|
"logits/rejected": -2.001523971557617, |
|
"logps/chosen": -299.8099670410156, |
|
"logps/rejected": -278.1839294433594, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.27376043796539307, |
|
"rewards/margins": 0.16408132016658783, |
|
"rewards/rejected": -0.4378418028354645, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.66581975839601e-05, |
|
"logits/chosen": -2.1282427310943604, |
|
"logits/rejected": -2.040194272994995, |
|
"logps/chosen": -349.28826904296875, |
|
"logps/rejected": -320.7705078125, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6112862229347229, |
|
"rewards/margins": 0.27279651165008545, |
|
"rewards/rejected": -0.8840827345848083, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.661270370887872e-05, |
|
"logits/chosen": -2.2946279048919678, |
|
"logits/rejected": -2.311314105987549, |
|
"logps/chosen": -298.4400634765625, |
|
"logps/rejected": -319.92828369140625, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5072231888771057, |
|
"rewards/margins": 0.09647183120250702, |
|
"rewards/rejected": -0.6036950349807739, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6566924747625176e-05, |
|
"logits/chosen": -2.1643552780151367, |
|
"logits/rejected": -2.13722562789917, |
|
"logps/chosen": -328.9122009277344, |
|
"logps/rejected": -411.6603088378906, |
|
"loss": 0.7268, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5001087188720703, |
|
"rewards/margins": 0.027627088129520416, |
|
"rewards/rejected": -0.527735710144043, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.652086130405492e-05, |
|
"logits/chosen": -2.0007681846618652, |
|
"logits/rejected": -2.173635959625244, |
|
"logps/chosen": -344.0201416015625, |
|
"logps/rejected": -489.677978515625, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.647387683391571, |
|
"rewards/margins": 0.5166620016098022, |
|
"rewards/rejected": -1.164049744606018, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.647451398577589e-05, |
|
"logits/chosen": -2.2768120765686035, |
|
"logits/rejected": -2.2112040519714355, |
|
"logps/chosen": -351.68658447265625, |
|
"logps/rejected": -304.02362060546875, |
|
"loss": 0.8001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5356206297874451, |
|
"rewards/margins": -0.1319734901189804, |
|
"rewards/rejected": -0.40364715456962585, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6427883404140564e-05, |
|
"logits/chosen": -2.0852530002593994, |
|
"logits/rejected": -2.1016790866851807, |
|
"logps/chosen": -337.05499267578125, |
|
"logps/rejected": -388.4207763671875, |
|
"loss": 0.7004, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.922147274017334, |
|
"rewards/margins": 0.11045366525650024, |
|
"rewards/rejected": -1.0326008796691895, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638097017423783e-05, |
|
"logits/chosen": -2.044597625732422, |
|
"logits/rejected": -2.14152193069458, |
|
"logps/chosen": -334.3042297363281, |
|
"logps/rejected": -319.0080871582031, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.36447468400001526, |
|
"rewards/margins": 0.62729412317276, |
|
"rewards/rejected": -0.9917687773704529, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6333774914884897e-05, |
|
"logits/chosen": -2.073789119720459, |
|
"logits/rejected": -2.1668505668640137, |
|
"logps/chosen": -308.4037780761719, |
|
"logps/rejected": -298.2853698730469, |
|
"loss": 0.7838, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6153988242149353, |
|
"rewards/margins": -0.0456385537981987, |
|
"rewards/rejected": -0.569760262966156, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6286298248619144e-05, |
|
"logits/chosen": -2.121100902557373, |
|
"logits/rejected": -2.0319623947143555, |
|
"logps/chosen": -342.6925048828125, |
|
"logps/rejected": -355.6612243652344, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8498857617378235, |
|
"rewards/margins": 0.18424755334854126, |
|
"rewards/rejected": -1.0341331958770752, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62385408016899e-05, |
|
"logits/chosen": -2.084768533706665, |
|
"logits/rejected": -2.15020489692688, |
|
"logps/chosen": -250.22640991210938, |
|
"logps/rejected": -272.5914001464844, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3922913670539856, |
|
"rewards/margins": 0.36925047636032104, |
|
"rewards/rejected": -0.7615418434143066, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.619050320405017e-05, |
|
"logits/chosen": -2.3483760356903076, |
|
"logits/rejected": -2.152430772781372, |
|
"logps/chosen": -317.35296630859375, |
|
"logps/rejected": -290.811767578125, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.718281626701355, |
|
"rewards/margins": 0.11232803016901016, |
|
"rewards/rejected": -0.8306095600128174, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.614218608934834e-05, |
|
"logits/chosen": -2.1370747089385986, |
|
"logits/rejected": -2.1002144813537598, |
|
"logps/chosen": -395.0451965332031, |
|
"logps/rejected": -446.2868347167969, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.938556969165802, |
|
"rewards/margins": 0.3290979564189911, |
|
"rewards/rejected": -1.2676548957824707, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.60935900949198e-05, |
|
"logits/chosen": -1.9551351070404053, |
|
"logits/rejected": -1.9744064807891846, |
|
"logps/chosen": -372.6743469238281, |
|
"logps/rejected": -486.5342102050781, |
|
"loss": 0.7102, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7985714077949524, |
|
"rewards/margins": 0.2688364088535309, |
|
"rewards/rejected": -1.0674078464508057, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.6044715861778596e-05, |
|
"logits/chosen": -2.051593065261841, |
|
"logits/rejected": -2.114675521850586, |
|
"logps/chosen": -294.7435302734375, |
|
"logps/rejected": -326.8503112792969, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.621825098991394, |
|
"rewards/margins": 0.25512248277664185, |
|
"rewards/rejected": -0.8769477009773254, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5995564034608884e-05, |
|
"logits/chosen": -2.160278797149658, |
|
"logits/rejected": -2.1137940883636475, |
|
"logps/chosen": -393.886474609375, |
|
"logps/rejected": -350.8844299316406, |
|
"loss": 0.7498, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8584200143814087, |
|
"rewards/margins": -0.06297742575407028, |
|
"rewards/rejected": -0.795442521572113, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5946135261756504e-05, |
|
"logits/chosen": -2.062591791152954, |
|
"logits/rejected": -2.16622257232666, |
|
"logps/chosen": -323.41131591796875, |
|
"logps/rejected": -332.9070739746094, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5620593428611755, |
|
"rewards/margins": 0.3896656036376953, |
|
"rewards/rejected": -0.9517249464988708, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5896430195220364e-05, |
|
"logits/chosen": -1.9204814434051514, |
|
"logits/rejected": -1.8729685544967651, |
|
"logps/chosen": -288.1690979003906, |
|
"logps/rejected": -313.9109802246094, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.849123477935791, |
|
"rewards/margins": 0.2756129801273346, |
|
"rewards/rejected": -1.1247365474700928, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.584644949064391e-05, |
|
"logits/chosen": -2.176421642303467, |
|
"logits/rejected": -2.2713022232055664, |
|
"logps/chosen": -273.307373046875, |
|
"logps/rejected": -273.297607421875, |
|
"loss": 0.7478, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.8873642086982727, |
|
"rewards/margins": -0.060915715992450714, |
|
"rewards/rejected": -0.8264484405517578, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.579619380730642e-05, |
|
"logits/chosen": -2.1005711555480957, |
|
"logits/rejected": -2.1496832370758057, |
|
"logps/chosen": -251.0666961669922, |
|
"logps/rejected": -300.4075012207031, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5288766622543335, |
|
"rewards/margins": 0.20617865025997162, |
|
"rewards/rejected": -0.7350552678108215, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.574566380811432e-05, |
|
"logits/chosen": -2.277989387512207, |
|
"logits/rejected": -2.2332725524902344, |
|
"logps/chosen": -357.2762145996094, |
|
"logps/rejected": -382.5482482910156, |
|
"loss": 0.7107, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6581230163574219, |
|
"rewards/margins": 0.030204597860574722, |
|
"rewards/rejected": -0.6883276700973511, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5694860159592465e-05, |
|
"logits/chosen": -1.9560626745224, |
|
"logits/rejected": -2.078892230987549, |
|
"logps/chosen": -314.22076416015625, |
|
"logps/rejected": -294.4151306152344, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6360582113265991, |
|
"rewards/margins": 0.2562226951122284, |
|
"rewards/rejected": -0.8922808766365051, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5643783531875323e-05, |
|
"logits/chosen": -2.0318408012390137, |
|
"logits/rejected": -2.1967966556549072, |
|
"logps/chosen": -251.95309448242188, |
|
"logps/rejected": -406.43310546875, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6249865293502808, |
|
"rewards/margins": 0.6104640960693359, |
|
"rewards/rejected": -1.2354506254196167, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.559243459869814e-05, |
|
"logits/chosen": -1.9235318899154663, |
|
"logits/rejected": -2.222059726715088, |
|
"logps/chosen": -282.896728515625, |
|
"logps/rejected": -374.2673645019531, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7379283905029297, |
|
"rewards/margins": 0.2646501362323761, |
|
"rewards/rejected": -1.0025784969329834, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5540814037388056e-05, |
|
"logits/chosen": -1.9744443893432617, |
|
"logits/rejected": -1.974491834640503, |
|
"logps/chosen": -377.78466796875, |
|
"logps/rejected": -405.6194152832031, |
|
"loss": 0.9336, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.8520271182060242, |
|
"rewards/margins": -0.32225731015205383, |
|
"rewards/rejected": -0.529769778251648, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5488922528855176e-05, |
|
"logits/chosen": -2.0435807704925537, |
|
"logits/rejected": -1.990431308746338, |
|
"logps/chosen": -306.81048583984375, |
|
"logps/rejected": -369.6317138671875, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.726284921169281, |
|
"rewards/margins": 0.3399674594402313, |
|
"rewards/rejected": -1.06625235080719, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.543676075758356e-05, |
|
"logits/chosen": -2.0265307426452637, |
|
"logits/rejected": -1.9903606176376343, |
|
"logps/chosen": -359.7049255371094, |
|
"logps/rejected": -348.72723388671875, |
|
"loss": 0.7122, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7237378358840942, |
|
"rewards/margins": 0.028506018221378326, |
|
"rewards/rejected": -0.7522438764572144, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.538432941162226e-05, |
|
"logits/chosen": -2.327871799468994, |
|
"logits/rejected": -2.218278408050537, |
|
"logps/chosen": -379.38812255859375, |
|
"logps/rejected": -380.68609619140625, |
|
"loss": 0.7641, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5538444519042969, |
|
"rewards/margins": -0.09269000589847565, |
|
"rewards/rejected": -0.46115440130233765, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5331629182576153e-05, |
|
"logits/chosen": -2.1100308895111084, |
|
"logits/rejected": -2.077500581741333, |
|
"logps/chosen": -265.77972412109375, |
|
"logps/rejected": -386.9795227050781, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6752620339393616, |
|
"rewards/margins": 0.13913662731647491, |
|
"rewards/rejected": -0.8143986463546753, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5278660765596884e-05, |
|
"logits/chosen": -2.0439562797546387, |
|
"logits/rejected": -2.036978244781494, |
|
"logps/chosen": -364.2665710449219, |
|
"logps/rejected": -373.9276123046875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7833287119865417, |
|
"rewards/margins": 0.15880194306373596, |
|
"rewards/rejected": -0.9421306848526001, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-05, |
|
"logits/chosen": -2.2488677501678467, |
|
"logits/rejected": -2.270367383956909, |
|
"logps/chosen": -329.8542175292969, |
|
"logps/rejected": -341.32464599609375, |
|
"loss": 0.6091, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5054783225059509, |
|
"rewards/margins": 0.29985660314559937, |
|
"rewards/rejected": -0.8053349256515503, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5171922166124154e-05, |
|
"logits/chosen": -2.2042911052703857, |
|
"logits/rejected": -2.2818048000335693, |
|
"logps/chosen": -345.41571044921875, |
|
"logps/rejected": -371.5228271484375, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5683087110519409, |
|
"rewards/margins": 0.31007736921310425, |
|
"rewards/rejected": -0.8783860802650452, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5118153391584974e-05, |
|
"logits/chosen": -2.283979892730713, |
|
"logits/rejected": -2.3613948822021484, |
|
"logps/chosen": -326.4739074707031, |
|
"logps/rejected": -346.43701171875, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5925798416137695, |
|
"rewards/margins": 0.04503173753619194, |
|
"rewards/rejected": -0.6376115679740906, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5064119245002626e-05, |
|
"logits/chosen": -1.9776369333267212, |
|
"logits/rejected": -2.186469793319702, |
|
"logps/chosen": -316.29217529296875, |
|
"logps/rejected": -339.24908447265625, |
|
"loss": 0.5385, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4294860363006592, |
|
"rewards/margins": 0.5443057417869568, |
|
"rewards/rejected": -0.973791778087616, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.500982043912404e-05, |
|
"logits/chosen": -1.8511924743652344, |
|
"logits/rejected": -1.7418498992919922, |
|
"logps/chosen": -221.07847595214844, |
|
"logps/rejected": -236.75009155273438, |
|
"loss": 0.8087, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6640509963035583, |
|
"rewards/margins": -0.18910002708435059, |
|
"rewards/rejected": -0.47495099902153015, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.495525769018717e-05, |
|
"logits/chosen": -2.1778125762939453, |
|
"logits/rejected": -2.3195223808288574, |
|
"logps/chosen": -315.2249755859375, |
|
"logps/rejected": -350.7799987792969, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5389224290847778, |
|
"rewards/margins": 0.4431789517402649, |
|
"rewards/rejected": -0.9821013808250427, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.490043171791155e-05, |
|
"logits/chosen": -2.139204502105713, |
|
"logits/rejected": -2.0121872425079346, |
|
"logps/chosen": -411.3319396972656, |
|
"logps/rejected": -478.99822998046875, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5370864272117615, |
|
"rewards/margins": 0.40471482276916504, |
|
"rewards/rejected": -0.9418012499809265, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.484534324548883e-05, |
|
"logits/chosen": -1.9133659601211548, |
|
"logits/rejected": -1.7547776699066162, |
|
"logps/chosen": -291.21502685546875, |
|
"logps/rejected": -296.52691650390625, |
|
"loss": 0.7422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7586185932159424, |
|
"rewards/margins": 0.04123706370592117, |
|
"rewards/rejected": -0.7998557686805725, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4789992999573194e-05, |
|
"logits/chosen": -1.887819766998291, |
|
"logits/rejected": -2.074976682662964, |
|
"logps/chosen": -238.8917999267578, |
|
"logps/rejected": -290.9273681640625, |
|
"loss": 0.7374, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8456001281738281, |
|
"rewards/margins": 0.0228101909160614, |
|
"rewards/rejected": -0.8684103488922119, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.47343817102718e-05, |
|
"logits/chosen": -1.776401400566101, |
|
"logits/rejected": -1.8924405574798584, |
|
"logps/chosen": -291.739990234375, |
|
"logps/rejected": -354.3161926269531, |
|
"loss": 0.7368, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.637408971786499, |
|
"rewards/margins": -0.0014106258749961853, |
|
"rewards/rejected": -0.6359982490539551, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.467851011113515e-05, |
|
"logits/chosen": -2.391042470932007, |
|
"logits/rejected": -2.4325904846191406, |
|
"logps/chosen": -423.8915710449219, |
|
"logps/rejected": -501.38568115234375, |
|
"loss": 0.7102, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9706649780273438, |
|
"rewards/margins": 0.06408338248729706, |
|
"rewards/rejected": -1.0347484350204468, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4622378939147416e-05, |
|
"logits/chosen": -2.258568048477173, |
|
"logits/rejected": -2.2797646522521973, |
|
"logps/chosen": -324.49005126953125, |
|
"logps/rejected": -288.9884948730469, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.46772387623786926, |
|
"rewards/margins": 0.14248529076576233, |
|
"rewards/rejected": -0.6102092266082764, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.456598893471668e-05, |
|
"logits/chosen": -2.1972222328186035, |
|
"logits/rejected": -2.0766568183898926, |
|
"logps/chosen": -391.5879211425781, |
|
"logps/rejected": -395.998779296875, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5539337396621704, |
|
"rewards/margins": 0.13942018151283264, |
|
"rewards/rejected": -0.6933539509773254, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.450934084166524e-05, |
|
"logits/chosen": -2.177605152130127, |
|
"logits/rejected": -2.378321886062622, |
|
"logps/chosen": -396.26800537109375, |
|
"logps/rejected": -513.7933349609375, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5082396268844604, |
|
"rewards/margins": 0.6496228575706482, |
|
"rewards/rejected": -1.1578625440597534, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.445243540721972e-05, |
|
"logits/chosen": -2.266407012939453, |
|
"logits/rejected": -2.261568069458008, |
|
"logps/chosen": -301.5278015136719, |
|
"logps/rejected": -347.5808410644531, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9058988094329834, |
|
"rewards/margins": 0.060255490243434906, |
|
"rewards/rejected": -0.9661542773246765, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4395273382001286e-05, |
|
"logits/chosen": -1.9136242866516113, |
|
"logits/rejected": -2.1110732555389404, |
|
"logps/chosen": -214.5594024658203, |
|
"logps/rejected": -286.8577880859375, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7257100343704224, |
|
"rewards/margins": 0.190011665225029, |
|
"rewards/rejected": -0.9157217144966125, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.433785552001568e-05, |
|
"logits/chosen": -2.236163854598999, |
|
"logits/rejected": -2.3334312438964844, |
|
"logps/chosen": -378.4542541503906, |
|
"logps/rejected": -426.2008361816406, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7116307020187378, |
|
"rewards/margins": 0.14856423437595367, |
|
"rewards/rejected": -0.860194981098175, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.428018257864333e-05, |
|
"logits/chosen": -2.0460734367370605, |
|
"logits/rejected": -2.1440231800079346, |
|
"logps/chosen": -280.720703125, |
|
"logps/rejected": -361.7416687011719, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.24584394693374634, |
|
"rewards/margins": 0.45910245180130005, |
|
"rewards/rejected": -0.7049463987350464, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4222255318629294e-05, |
|
"logits/chosen": -1.7771077156066895, |
|
"logits/rejected": -2.246995210647583, |
|
"logps/chosen": -307.57232666015625, |
|
"logps/rejected": -455.2540283203125, |
|
"loss": 0.7022, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4229920506477356, |
|
"rewards/margins": 0.13145704567432404, |
|
"rewards/rejected": -0.5544491410255432, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4164074504073313e-05, |
|
"logits/chosen": -2.343817949295044, |
|
"logits/rejected": -2.2758233547210693, |
|
"logps/chosen": -376.11419677734375, |
|
"logps/rejected": -370.3990478515625, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.604290783405304, |
|
"rewards/margins": 0.13700008392333984, |
|
"rewards/rejected": -0.7412909269332886, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.410564090241966e-05, |
|
"logits/chosen": -1.545508623123169, |
|
"logits/rejected": -1.8171346187591553, |
|
"logps/chosen": -221.09950256347656, |
|
"logps/rejected": -334.39544677734375, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5746042132377625, |
|
"rewards/margins": 0.4071567952632904, |
|
"rewards/rejected": -0.9817609786987305, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4046955284447044e-05, |
|
"logits/chosen": -2.077967643737793, |
|
"logits/rejected": -2.2846693992614746, |
|
"logps/chosen": -324.32476806640625, |
|
"logps/rejected": -440.4974365234375, |
|
"loss": 0.7404, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6503464579582214, |
|
"rewards/margins": 0.04034698009490967, |
|
"rewards/rejected": -0.6906934976577759, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.398801842425842e-05, |
|
"logits/chosen": -2.1588635444641113, |
|
"logits/rejected": -2.1049904823303223, |
|
"logps/chosen": -406.24176025390625, |
|
"logps/rejected": -372.340087890625, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7770822048187256, |
|
"rewards/margins": 0.31246596574783325, |
|
"rewards/rejected": -1.0895482301712036, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.392883109927083e-05, |
|
"logits/chosen": -2.0670382976531982, |
|
"logits/rejected": -1.8806589841842651, |
|
"logps/chosen": -401.965087890625, |
|
"logps/rejected": -393.48516845703125, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8736264705657959, |
|
"rewards/margins": 0.1775364875793457, |
|
"rewards/rejected": -1.0511629581451416, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.38693940902051e-05, |
|
"logits/chosen": -1.988671064376831, |
|
"logits/rejected": -1.9722682237625122, |
|
"logps/chosen": -387.1339111328125, |
|
"logps/rejected": -409.888916015625, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7169901132583618, |
|
"rewards/margins": 0.27162882685661316, |
|
"rewards/rejected": -0.9886189699172974, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3809708181075556e-05, |
|
"logits/chosen": -2.186702013015747, |
|
"logits/rejected": -2.0955049991607666, |
|
"logps/chosen": -395.58294677734375, |
|
"logps/rejected": -347.0909118652344, |
|
"loss": 0.8175, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9574447274208069, |
|
"rewards/margins": -0.11547727137804031, |
|
"rewards/rejected": -0.8419675230979919, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.374977415917969e-05, |
|
"logits/chosen": -2.026996612548828, |
|
"logits/rejected": -1.772495985031128, |
|
"logps/chosen": -379.002685546875, |
|
"logps/rejected": -381.895751953125, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6855794191360474, |
|
"rewards/margins": 0.3581579923629761, |
|
"rewards/rejected": -1.0437374114990234, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3689592815087764e-05, |
|
"logits/chosen": -1.9657152891159058, |
|
"logits/rejected": -1.8686068058013916, |
|
"logps/chosen": -367.0695495605469, |
|
"logps/rejected": -369.2501525878906, |
|
"loss": 0.7995, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0888875722885132, |
|
"rewards/margins": -0.10578227788209915, |
|
"rewards/rejected": -0.9831052422523499, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3629164942632386e-05, |
|
"logits/chosen": -1.7416181564331055, |
|
"logits/rejected": -1.8769294023513794, |
|
"logps/chosen": -255.93417358398438, |
|
"logps/rejected": -316.71331787109375, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7211109399795532, |
|
"rewards/margins": 0.501852810382843, |
|
"rewards/rejected": -1.222963809967041, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3568491338898055e-05, |
|
"logits/chosen": -2.201251745223999, |
|
"logits/rejected": -2.0464892387390137, |
|
"logps/chosen": -273.45849609375, |
|
"logps/rejected": -306.01361083984375, |
|
"loss": 0.8629, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.645514965057373, |
|
"rewards/margins": -0.2321767956018448, |
|
"rewards/rejected": -0.41333818435668945, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.350757280421061e-05, |
|
"logits/chosen": -1.9395544528961182, |
|
"logits/rejected": -1.923555612564087, |
|
"logps/chosen": -401.73516845703125, |
|
"logps/rejected": -390.874267578125, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9947636723518372, |
|
"rewards/margins": 0.2778167724609375, |
|
"rewards/rejected": -1.2725805044174194, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.34464101421267e-05, |
|
"logits/chosen": -1.8383352756500244, |
|
"logits/rejected": -1.9566022157669067, |
|
"logps/chosen": -381.46258544921875, |
|
"logps/rejected": -383.20050048828125, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5637756586074829, |
|
"rewards/margins": 0.3097578287124634, |
|
"rewards/rejected": -0.8735334277153015, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.338500415942319e-05, |
|
"logits/chosen": -2.0855202674865723, |
|
"logits/rejected": -1.8852460384368896, |
|
"logps/chosen": -313.52093505859375, |
|
"logps/rejected": -327.4878234863281, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6615315079689026, |
|
"rewards/margins": 0.2937285006046295, |
|
"rewards/rejected": -0.9552599787712097, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3323355666086506e-05, |
|
"logits/chosen": -2.2466461658477783, |
|
"logits/rejected": -2.194441556930542, |
|
"logps/chosen": -385.48297119140625, |
|
"logps/rejected": -358.59783935546875, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8933424353599548, |
|
"rewards/margins": 0.060363732278347015, |
|
"rewards/rejected": -0.9537062048912048, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.326146547530196e-05, |
|
"logits/chosen": -1.9754979610443115, |
|
"logits/rejected": -2.028249502182007, |
|
"logps/chosen": -392.3201599121094, |
|
"logps/rejected": -451.3328552246094, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7265093922615051, |
|
"rewards/margins": 0.5438079833984375, |
|
"rewards/rejected": -1.2703173160552979, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3199334403442976e-05, |
|
"logits/chosen": -1.9868967533111572, |
|
"logits/rejected": -1.997020959854126, |
|
"logps/chosen": -324.3955078125, |
|
"logps/rejected": -345.5137634277344, |
|
"loss": 0.8214, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8653470873832703, |
|
"rewards/margins": -0.15076835453510284, |
|
"rewards/rejected": -0.7145787477493286, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.313696327006042e-05, |
|
"logits/chosen": -2.213704824447632, |
|
"logits/rejected": -2.111079692840576, |
|
"logps/chosen": -404.150634765625, |
|
"logps/rejected": -373.7305603027344, |
|
"loss": 0.7839, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9824564456939697, |
|
"rewards/margins": 0.003427162766456604, |
|
"rewards/rejected": -0.9858837127685547, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3074352897871686e-05, |
|
"logits/chosen": -1.8593621253967285, |
|
"logits/rejected": -2.1451752185821533, |
|
"logps/chosen": -312.2982482910156, |
|
"logps/rejected": -318.39642333984375, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7266647815704346, |
|
"rewards/margins": 0.3218391537666321, |
|
"rewards/rejected": -1.0485039949417114, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.301150411274992e-05, |
|
"logits/chosen": -1.75832200050354, |
|
"logits/rejected": -1.8182921409606934, |
|
"logps/chosen": -294.1172790527344, |
|
"logps/rejected": -396.57208251953125, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8868988752365112, |
|
"rewards/margins": 0.2270849496126175, |
|
"rewards/rejected": -1.1139838695526123, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.294841774371308e-05, |
|
"logits/chosen": -1.9476118087768555, |
|
"logits/rejected": -1.9689973592758179, |
|
"logps/chosen": -333.6837158203125, |
|
"logps/rejected": -371.0637512207031, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.839272677898407, |
|
"rewards/margins": 0.3770362436771393, |
|
"rewards/rejected": -1.2163089513778687, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.288509462291302e-05, |
|
"logits/chosen": -2.073699474334717, |
|
"logits/rejected": -1.9370347261428833, |
|
"logps/chosen": -378.5625305175781, |
|
"logps/rejected": -395.1451416015625, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7811661958694458, |
|
"rewards/margins": 0.2788164019584656, |
|
"rewards/rejected": -1.0599825382232666, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2821535585624504e-05, |
|
"logits/chosen": -1.9244226217269897, |
|
"logits/rejected": -2.0157883167266846, |
|
"logps/chosen": -374.1487121582031, |
|
"logps/rejected": -391.6187438964844, |
|
"loss": 0.7111, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7071002721786499, |
|
"rewards/margins": 0.08099737763404846, |
|
"rewards/rejected": -0.7880975604057312, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2757741470234214e-05, |
|
"logits/chosen": -2.2348544597625732, |
|
"logits/rejected": -2.087557554244995, |
|
"logps/chosen": -326.5330810546875, |
|
"logps/rejected": -336.2727355957031, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4954121708869934, |
|
"rewards/margins": 0.1729755401611328, |
|
"rewards/rejected": -0.668387770652771, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.269371311822965e-05, |
|
"logits/chosen": -2.137007236480713, |
|
"logits/rejected": -2.230994701385498, |
|
"logps/chosen": -402.22711181640625, |
|
"logps/rejected": -455.44390869140625, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0957834720611572, |
|
"rewards/margins": 0.4251984655857086, |
|
"rewards/rejected": -1.520982027053833, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2629451374188055e-05, |
|
"logits/chosen": -2.0285918712615967, |
|
"logits/rejected": -2.096553087234497, |
|
"logps/chosen": -352.289306640625, |
|
"logps/rejected": -334.0386962890625, |
|
"loss": 0.7908, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6512686610221863, |
|
"rewards/margins": -0.014007307589054108, |
|
"rewards/rejected": -0.6372612714767456, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.256495708576527e-05, |
|
"logits/chosen": -2.0124435424804688, |
|
"logits/rejected": -2.203395128250122, |
|
"logps/chosen": -344.8824768066406, |
|
"logps/rejected": -407.80548095703125, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.666597843170166, |
|
"rewards/margins": 0.5224538445472717, |
|
"rewards/rejected": -1.1890517473220825, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.250023110368457e-05, |
|
"logits/chosen": -2.0517170429229736, |
|
"logits/rejected": -2.260333299636841, |
|
"logps/chosen": -319.9532470703125, |
|
"logps/rejected": -426.59197998046875, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0301696062088013, |
|
"rewards/margins": 0.4518246054649353, |
|
"rewards/rejected": -1.4819941520690918, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.243527428172541e-05, |
|
"logits/chosen": -1.7961515188217163, |
|
"logits/rejected": -2.0152459144592285, |
|
"logps/chosen": -338.9334411621094, |
|
"logps/rejected": -429.2481384277344, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7761648893356323, |
|
"rewards/margins": 0.3605046570301056, |
|
"rewards/rejected": -1.136669635772705, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.237008747671217e-05, |
|
"logits/chosen": -2.139997720718384, |
|
"logits/rejected": -2.038111686706543, |
|
"logps/chosen": -311.0231018066406, |
|
"logps/rejected": -327.61065673828125, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0172241926193237, |
|
"rewards/margins": 0.20826146006584167, |
|
"rewards/rejected": -1.2254855632781982, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2304671548502896e-05, |
|
"logits/chosen": -1.859197735786438, |
|
"logits/rejected": -2.0383810997009277, |
|
"logps/chosen": -319.2428894042969, |
|
"logps/rejected": -297.6011657714844, |
|
"loss": 0.7805, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.34115666151046753, |
|
"rewards/margins": -0.06757514923810959, |
|
"rewards/rejected": -0.27358150482177734, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.223902735997788e-05, |
|
"logits/chosen": -2.063511371612549, |
|
"logits/rejected": -2.0269269943237305, |
|
"logps/chosen": -329.1101379394531, |
|
"logps/rejected": -359.3594665527344, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6491307616233826, |
|
"rewards/margins": 0.3211410343647003, |
|
"rewards/rejected": -0.9702718257904053, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.217315577702836e-05, |
|
"logits/chosen": -2.3090927600860596, |
|
"logits/rejected": -2.2255845069885254, |
|
"logps/chosen": -438.4259338378906, |
|
"logps/rejected": -385.08880615234375, |
|
"loss": 0.7974, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6954346895217896, |
|
"rewards/margins": 0.034093111753463745, |
|
"rewards/rejected": -0.7295278310775757, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.2107057668545044e-05, |
|
"logits/chosen": -2.138420581817627, |
|
"logits/rejected": -2.3774194717407227, |
|
"logps/chosen": -241.03421020507812, |
|
"logps/rejected": -291.4215087890625, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7353395223617554, |
|
"rewards/margins": 0.4763728976249695, |
|
"rewards/rejected": -1.2117124795913696, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.204073390640666e-05, |
|
"logits/chosen": -2.095376968383789, |
|
"logits/rejected": -2.1427035331726074, |
|
"logps/chosen": -359.9441223144531, |
|
"logps/rejected": -460.4444580078125, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9195146560668945, |
|
"rewards/margins": 0.12283174693584442, |
|
"rewards/rejected": -1.042346477508545, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1974185365468467e-05, |
|
"logits/chosen": -2.082658052444458, |
|
"logits/rejected": -2.0861730575561523, |
|
"logps/chosen": -364.9388427734375, |
|
"logps/rejected": -403.16510009765625, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5946727991104126, |
|
"rewards/margins": 0.3608367443084717, |
|
"rewards/rejected": -0.955509603023529, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.19074129235507e-05, |
|
"logits/chosen": -2.088836431503296, |
|
"logits/rejected": -2.0834221839904785, |
|
"logps/chosen": -319.285888671875, |
|
"logps/rejected": -320.6772766113281, |
|
"loss": 0.8216, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.882214367389679, |
|
"rewards/margins": -0.12802943587303162, |
|
"rewards/rejected": -0.7541849613189697, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.184041746142702e-05, |
|
"logits/chosen": -2.211498498916626, |
|
"logits/rejected": -2.1443381309509277, |
|
"logps/chosen": -379.1222229003906, |
|
"logps/rejected": -416.7032470703125, |
|
"loss": 0.7431, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7400021553039551, |
|
"rewards/margins": -0.029454410076141357, |
|
"rewards/rejected": -0.7105477452278137, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.177319986281285e-05, |
|
"logits/chosen": -1.9428646564483643, |
|
"logits/rejected": -2.0589828491210938, |
|
"logps/chosen": -338.52618408203125, |
|
"logps/rejected": -396.5542297363281, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3697546124458313, |
|
"rewards/margins": 0.2811446785926819, |
|
"rewards/rejected": -0.6508992314338684, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.170576101435376e-05, |
|
"logits/chosen": -2.2952022552490234, |
|
"logits/rejected": -2.321949005126953, |
|
"logps/chosen": -275.3761901855469, |
|
"logps/rejected": -350.7015075683594, |
|
"loss": 0.6968, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3928203880786896, |
|
"rewards/margins": 0.07630396634340286, |
|
"rewards/rejected": -0.46912431716918945, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.163810180561376e-05, |
|
"logits/chosen": -1.8680933713912964, |
|
"logits/rejected": -2.0927417278289795, |
|
"logps/chosen": -303.8800354003906, |
|
"logps/rejected": -341.4588928222656, |
|
"loss": 0.7411, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6674574613571167, |
|
"rewards/margins": -0.02869322896003723, |
|
"rewards/rejected": -0.6387642621994019, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.157022312906352e-05, |
|
"logits/chosen": -1.946225881576538, |
|
"logits/rejected": -1.9390500783920288, |
|
"logps/chosen": -340.5247497558594, |
|
"logps/rejected": -309.8658447265625, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6571869850158691, |
|
"rewards/margins": 0.13291773200035095, |
|
"rewards/rejected": -0.7901047468185425, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.150212588006871e-05, |
|
"logits/chosen": -2.1470143795013428, |
|
"logits/rejected": -2.4466309547424316, |
|
"logps/chosen": -355.8979797363281, |
|
"logps/rejected": -360.3694763183594, |
|
"loss": 0.7295, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8183680772781372, |
|
"rewards/margins": -0.04079904779791832, |
|
"rewards/rejected": -0.7775689363479614, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.143381095687805e-05, |
|
"logits/chosen": -1.8094087839126587, |
|
"logits/rejected": -2.024905204772949, |
|
"logps/chosen": -291.0030517578125, |
|
"logps/rejected": -401.3894348144531, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.30385422706604004, |
|
"rewards/margins": 0.38516706228256226, |
|
"rewards/rejected": -0.6890213489532471, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.136527926061157e-05, |
|
"logits/chosen": -2.2869348526000977, |
|
"logits/rejected": -2.3848395347595215, |
|
"logps/chosen": -354.1495361328125, |
|
"logps/rejected": -422.5235900878906, |
|
"loss": 0.7545, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5594637393951416, |
|
"rewards/margins": -0.08133678883314133, |
|
"rewards/rejected": -0.4781269133090973, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.1296531695248666e-05, |
|
"logits/chosen": -2.1156575679779053, |
|
"logits/rejected": -2.0625457763671875, |
|
"logps/chosen": -420.004150390625, |
|
"logps/rejected": -347.8268737792969, |
|
"loss": 0.7191, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7436313033103943, |
|
"rewards/margins": 0.03417450189590454, |
|
"rewards/rejected": -0.777805745601654, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.1227569167616206e-05, |
|
"logits/chosen": -1.9652526378631592, |
|
"logits/rejected": -2.0387110710144043, |
|
"logps/chosen": -294.15997314453125, |
|
"logps/rejected": -333.1921081542969, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.547760009765625, |
|
"rewards/margins": 0.18485116958618164, |
|
"rewards/rejected": -0.7326111793518066, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.1158392587376536e-05, |
|
"logits/chosen": -1.9591008424758911, |
|
"logits/rejected": -1.9757970571517944, |
|
"logps/chosen": -308.64453125, |
|
"logps/rejected": -373.3291320800781, |
|
"loss": 0.7154, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3303202986717224, |
|
"rewards/margins": 0.0813257172703743, |
|
"rewards/rejected": -0.4116460382938385, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.108900286701552e-05, |
|
"logits/chosen": -2.0622799396514893, |
|
"logits/rejected": -2.0668537616729736, |
|
"logps/chosen": -209.19119262695312, |
|
"logps/rejected": -266.43145751953125, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.61204993724823, |
|
"rewards/margins": 0.28989237546920776, |
|
"rewards/rejected": -0.901942253112793, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.101940092183048e-05, |
|
"logits/chosen": -2.208672523498535, |
|
"logits/rejected": -2.2501301765441895, |
|
"logps/chosen": -449.76763916015625, |
|
"logps/rejected": -333.26617431640625, |
|
"loss": 0.735, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4438176155090332, |
|
"rewards/margins": 0.0284462571144104, |
|
"rewards/rejected": -0.472263902425766, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0949587669918124e-05, |
|
"logits/chosen": -2.2545619010925293, |
|
"logits/rejected": -2.284487009048462, |
|
"logps/chosen": -368.6788635253906, |
|
"logps/rejected": -404.1095275878906, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6989068984985352, |
|
"rewards/margins": 0.13742834329605103, |
|
"rewards/rejected": -0.8363352417945862, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.087956403216243e-05, |
|
"logits/chosen": -2.098728895187378, |
|
"logits/rejected": -1.9426969289779663, |
|
"logps/chosen": -364.287109375, |
|
"logps/rejected": -345.9195556640625, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5762618780136108, |
|
"rewards/margins": 0.06659980118274689, |
|
"rewards/rejected": -0.6428617238998413, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0809330932222525e-05, |
|
"logits/chosen": -2.0413217544555664, |
|
"logits/rejected": -1.770898699760437, |
|
"logps/chosen": -338.1258850097656, |
|
"logps/rejected": -310.4683837890625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.612076461315155, |
|
"rewards/margins": 0.09379884600639343, |
|
"rewards/rejected": -0.7058753371238708, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.073888929652048e-05, |
|
"logits/chosen": -1.8582487106323242, |
|
"logits/rejected": -1.9838684797286987, |
|
"logps/chosen": -279.77764892578125, |
|
"logps/rejected": -287.03314208984375, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.22681152820587158, |
|
"rewards/margins": 0.17675894498825073, |
|
"rewards/rejected": -0.4035705029964447, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066824005422907e-05, |
|
"logits/chosen": -2.2785511016845703, |
|
"logits/rejected": -2.248875379562378, |
|
"logps/chosen": -282.7828369140625, |
|
"logps/rejected": -282.3833923339844, |
|
"loss": 0.6969, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5595721006393433, |
|
"rewards/margins": 0.007087539881467819, |
|
"rewards/rejected": -0.5666596293449402, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0597384137259576e-05, |
|
"logits/chosen": -1.8838849067687988, |
|
"logits/rejected": -1.992185115814209, |
|
"logps/chosen": -249.31353759765625, |
|
"logps/rejected": -291.52215576171875, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25019168853759766, |
|
"rewards/margins": 0.13961070775985718, |
|
"rewards/rejected": -0.3898024260997772, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.052632248024943e-05, |
|
"logits/chosen": -2.194199562072754, |
|
"logits/rejected": -2.2093465328216553, |
|
"logps/chosen": -359.925048828125, |
|
"logps/rejected": -341.787109375, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44080251455307007, |
|
"rewards/margins": 0.10257270187139511, |
|
"rewards/rejected": -0.5433753132820129, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.045505602054994e-05, |
|
"logits/chosen": -1.9952166080474854, |
|
"logits/rejected": -1.9181216955184937, |
|
"logps/chosen": -272.50146484375, |
|
"logps/rejected": -296.2552795410156, |
|
"loss": 0.7549, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5810081958770752, |
|
"rewards/margins": -0.0541771724820137, |
|
"rewards/rejected": -0.5268309712409973, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0383585698213876e-05, |
|
"logits/chosen": -2.166259527206421, |
|
"logits/rejected": -1.9561712741851807, |
|
"logps/chosen": -392.21014404296875, |
|
"logps/rejected": -360.259033203125, |
|
"loss": 0.723, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7585784196853638, |
|
"rewards/margins": 0.024934954941272736, |
|
"rewards/rejected": -0.7835134267807007, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.03119124559831e-05, |
|
"logits/chosen": -2.117828845977783, |
|
"logits/rejected": -2.2696075439453125, |
|
"logps/chosen": -370.1112060546875, |
|
"logps/rejected": -357.4794921875, |
|
"loss": 0.7401, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7355208396911621, |
|
"rewards/margins": 0.01835598051548004, |
|
"rewards/rejected": -0.7538768649101257, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.024003723927614e-05, |
|
"logits/chosen": -2.15438175201416, |
|
"logits/rejected": -2.240237236022949, |
|
"logps/chosen": -291.9135437011719, |
|
"logps/rejected": -306.6573791503906, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6013116836547852, |
|
"rewards/margins": 0.29444190859794617, |
|
"rewards/rejected": -0.8957535028457642, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.016796099617569e-05, |
|
"logits/chosen": -2.123490571975708, |
|
"logits/rejected": -1.9996310472488403, |
|
"logps/chosen": -320.6844787597656, |
|
"logps/rejected": -328.69732666015625, |
|
"loss": 0.7201, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6908583045005798, |
|
"rewards/margins": 0.018049392849206924, |
|
"rewards/rejected": -0.7089077234268188, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.009568467741611e-05, |
|
"logits/chosen": -2.1538658142089844, |
|
"logits/rejected": -2.2801826000213623, |
|
"logps/chosen": -332.2469482421875, |
|
"logps/rejected": -398.9875793457031, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.35775309801101685, |
|
"rewards/margins": 0.2206544280052185, |
|
"rewards/rejected": -0.5784075260162354, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.0023209236370905e-05, |
|
"logits/chosen": -2.1057112216949463, |
|
"logits/rejected": -1.9648573398590088, |
|
"logps/chosen": -304.4049377441406, |
|
"logps/rejected": -331.1800842285156, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4884049594402313, |
|
"rewards/margins": 0.28548693656921387, |
|
"rewards/rejected": -0.7738919258117676, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9950535629040154e-05, |
|
"logits/chosen": -2.075382947921753, |
|
"logits/rejected": -2.061739206314087, |
|
"logps/chosen": -287.4841003417969, |
|
"logps/rejected": -279.5746765136719, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.42838478088378906, |
|
"rewards/margins": 0.09299005568027496, |
|
"rewards/rejected": -0.5213748216629028, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9877664814037844e-05, |
|
"logits/chosen": -2.041006088256836, |
|
"logits/rejected": -2.146519899368286, |
|
"logps/chosen": -234.5111083984375, |
|
"logps/rejected": -315.4188232421875, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44257616996765137, |
|
"rewards/margins": 0.30319535732269287, |
|
"rewards/rejected": -0.745771586894989, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.98045977525793e-05, |
|
"logits/chosen": -1.9708685874938965, |
|
"logits/rejected": -2.119077444076538, |
|
"logps/chosen": -233.92739868164062, |
|
"logps/rejected": -251.74658203125, |
|
"loss": 0.7386, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.43048954010009766, |
|
"rewards/margins": 0.0007461756467819214, |
|
"rewards/rejected": -0.43123573064804077, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.973133540846844e-05, |
|
"logits/chosen": -2.2124104499816895, |
|
"logits/rejected": -2.3888068199157715, |
|
"logps/chosen": -378.053955078125, |
|
"logps/rejected": -432.74102783203125, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4846843481063843, |
|
"rewards/margins": 0.2898719012737274, |
|
"rewards/rejected": -0.7745562195777893, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.965787874808513e-05, |
|
"logits/chosen": -2.182685375213623, |
|
"logits/rejected": -2.245464324951172, |
|
"logps/chosen": -303.55377197265625, |
|
"logps/rejected": -304.9583435058594, |
|
"loss": 0.7466, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.72776859998703, |
|
"rewards/margins": -0.010826468467712402, |
|
"rewards/rejected": -0.7169421911239624, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.958422874037236e-05, |
|
"logits/chosen": -2.2944741249084473, |
|
"logits/rejected": -2.1541600227355957, |
|
"logps/chosen": -325.5491638183594, |
|
"logps/rejected": -359.56488037109375, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6380666494369507, |
|
"rewards/margins": 0.19594302773475647, |
|
"rewards/rejected": -0.8340096473693848, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.951038635682353e-05, |
|
"logits/chosen": -2.183659553527832, |
|
"logits/rejected": -2.309375762939453, |
|
"logps/chosen": -233.40463256835938, |
|
"logps/rejected": -258.43792724609375, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.34500959515571594, |
|
"rewards/margins": 0.3104623556137085, |
|
"rewards/rejected": -0.655471920967102, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.943635257146958e-05, |
|
"logits/chosen": -2.189570903778076, |
|
"logits/rejected": -2.307717800140381, |
|
"logps/chosen": -310.58837890625, |
|
"logps/rejected": -396.47412109375, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5562077760696411, |
|
"rewards/margins": 0.26935189962387085, |
|
"rewards/rejected": -0.825559675693512, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.936212836086621e-05, |
|
"logits/chosen": -2.087996244430542, |
|
"logits/rejected": -2.061500310897827, |
|
"logps/chosen": -320.0885009765625, |
|
"logps/rejected": -367.2314453125, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5524263381958008, |
|
"rewards/margins": 0.3210427165031433, |
|
"rewards/rejected": -0.8734689354896545, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9287714704080916e-05, |
|
"logits/chosen": -2.2036032676696777, |
|
"logits/rejected": -2.247941017150879, |
|
"logps/chosen": -310.3907470703125, |
|
"logps/rejected": -373.12005615234375, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7271711230278015, |
|
"rewards/margins": 0.1365385353565216, |
|
"rewards/rejected": -0.8637096881866455, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9213112582680136e-05, |
|
"logits/chosen": -2.071441888809204, |
|
"logits/rejected": -2.2175564765930176, |
|
"logps/chosen": -350.5240478515625, |
|
"logps/rejected": -344.10015869140625, |
|
"loss": 0.819, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7361069321632385, |
|
"rewards/margins": -0.15310856699943542, |
|
"rewards/rejected": -0.5829984545707703, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.913832298071629e-05, |
|
"logits/chosen": -2.137769937515259, |
|
"logits/rejected": -2.1863951683044434, |
|
"logps/chosen": -261.62445068359375, |
|
"logps/rejected": -287.54925537109375, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5611026287078857, |
|
"rewards/margins": 0.35456323623657227, |
|
"rewards/rejected": -0.915665864944458, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.906334688471479e-05, |
|
"logits/chosen": -2.322150230407715, |
|
"logits/rejected": -2.2364730834960938, |
|
"logps/chosen": -372.37115478515625, |
|
"logps/rejected": -441.963623046875, |
|
"loss": 0.7185, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7501680850982666, |
|
"rewards/margins": 0.004912780597805977, |
|
"rewards/rejected": -0.7550809383392334, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8988185283661006e-05, |
|
"logits/chosen": -2.3357200622558594, |
|
"logits/rejected": -2.281803846359253, |
|
"logps/chosen": -349.5588073730469, |
|
"logps/rejected": -427.9027099609375, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6943049430847168, |
|
"rewards/margins": 0.08358533680438995, |
|
"rewards/rejected": -0.7778902649879456, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8912839168987286e-05, |
|
"logits/chosen": -2.0027129650115967, |
|
"logits/rejected": -2.201495885848999, |
|
"logps/chosen": -335.1200866699219, |
|
"logps/rejected": -356.2291259765625, |
|
"loss": 0.7201, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.825676441192627, |
|
"rewards/margins": 0.053414199501276016, |
|
"rewards/rejected": -0.8790906667709351, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.883730953455981e-05, |
|
"logits/chosen": -2.0758540630340576, |
|
"logits/rejected": -2.2228283882141113, |
|
"logps/chosen": -330.1047058105469, |
|
"logps/rejected": -359.3675842285156, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9475977420806885, |
|
"rewards/margins": 0.2040516585111618, |
|
"rewards/rejected": -1.1516493558883667, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.876159737666551e-05, |
|
"logits/chosen": -2.0982770919799805, |
|
"logits/rejected": -2.1729726791381836, |
|
"logps/chosen": -386.24560546875, |
|
"logps/rejected": -394.7810974121094, |
|
"loss": 0.7604, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8945655226707458, |
|
"rewards/margins": -0.06084037944674492, |
|
"rewards/rejected": -0.8337251543998718, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.868570369399894e-05, |
|
"logits/chosen": -2.116682767868042, |
|
"logits/rejected": -2.1849117279052734, |
|
"logps/chosen": -251.51715087890625, |
|
"logps/rejected": -259.1833801269531, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6918532252311707, |
|
"rewards/margins": 0.33846980333328247, |
|
"rewards/rejected": -1.0303230285644531, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.860962948764906e-05, |
|
"logits/chosen": -2.0886361598968506, |
|
"logits/rejected": -2.046086549758911, |
|
"logps/chosen": -292.8084411621094, |
|
"logps/rejected": -357.3808898925781, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5758143067359924, |
|
"rewards/margins": 0.24546638131141663, |
|
"rewards/rejected": -0.8212807178497314, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.85333757610861e-05, |
|
"logits/chosen": -2.0787835121154785, |
|
"logits/rejected": -2.094371795654297, |
|
"logps/chosen": -318.9442443847656, |
|
"logps/rejected": -354.12188720703125, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6455579996109009, |
|
"rewards/margins": 0.18885663151741028, |
|
"rewards/rejected": -0.8344146013259888, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845694352014825e-05, |
|
"logits/chosen": -2.0175139904022217, |
|
"logits/rejected": -2.0191752910614014, |
|
"logps/chosen": -341.6969299316406, |
|
"logps/rejected": -383.448974609375, |
|
"loss": 0.7511, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.768385648727417, |
|
"rewards/margins": -0.034262366592884064, |
|
"rewards/rejected": -0.7341232895851135, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.838033377302844e-05, |
|
"logits/chosen": -2.143493413925171, |
|
"logits/rejected": -2.2795639038085938, |
|
"logps/chosen": -304.6917724609375, |
|
"logps/rejected": -347.99774169921875, |
|
"loss": 0.7008, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7183622717857361, |
|
"rewards/margins": 0.0656728744506836, |
|
"rewards/rejected": -0.7840351462364197, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.830354753026102e-05, |
|
"logits/chosen": -2.0630643367767334, |
|
"logits/rejected": -2.1921281814575195, |
|
"logps/chosen": -290.36041259765625, |
|
"logps/rejected": -370.9847717285156, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6518306732177734, |
|
"rewards/margins": 0.20937411487102509, |
|
"rewards/rejected": -0.8612047433853149, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8226585804708435e-05, |
|
"logits/chosen": -2.1670899391174316, |
|
"logits/rejected": -2.202104330062866, |
|
"logps/chosen": -399.4073486328125, |
|
"logps/rejected": -372.1466979980469, |
|
"loss": 0.7435, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7544846534729004, |
|
"rewards/margins": -0.017259221524000168, |
|
"rewards/rejected": -0.7372254133224487, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8149449611547886e-05, |
|
"logits/chosen": -2.1285555362701416, |
|
"logits/rejected": -2.118056297302246, |
|
"logps/chosen": -325.7785949707031, |
|
"logps/rejected": -354.8650817871094, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7010979056358337, |
|
"rewards/margins": 0.0963049978017807, |
|
"rewards/rejected": -0.7974028587341309, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.807213996825788e-05, |
|
"logits/chosen": -2.134826183319092, |
|
"logits/rejected": -2.105046272277832, |
|
"logps/chosen": -347.5355529785156, |
|
"logps/rejected": -370.0594482421875, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.640611469745636, |
|
"rewards/margins": 0.43629205226898193, |
|
"rewards/rejected": -1.0769035816192627, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7994657894604906e-05, |
|
"logits/chosen": -1.999627709388733, |
|
"logits/rejected": -2.0539674758911133, |
|
"logps/chosen": -324.84100341796875, |
|
"logps/rejected": -294.4129333496094, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6821228265762329, |
|
"rewards/margins": 0.3325861096382141, |
|
"rewards/rejected": -1.0147089958190918, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.791700441262987e-05, |
|
"logits/chosen": -2.2994258403778076, |
|
"logits/rejected": -2.4870550632476807, |
|
"logps/chosen": -308.85284423828125, |
|
"logps/rejected": -387.4725341796875, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8057706356048584, |
|
"rewards/margins": 0.3048417270183563, |
|
"rewards/rejected": -1.110612392425537, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.78391805466347e-05, |
|
"logits/chosen": -2.0464038848876953, |
|
"logits/rejected": -1.9356979131698608, |
|
"logps/chosen": -348.22613525390625, |
|
"logps/rejected": -336.2864685058594, |
|
"loss": 0.7458, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8788054585456848, |
|
"rewards/margins": 0.08573350310325623, |
|
"rewards/rejected": -0.9645389914512634, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7761187323168804e-05, |
|
"logits/chosen": -2.0775394439697266, |
|
"logits/rejected": -2.0725295543670654, |
|
"logps/chosen": -378.3978576660156, |
|
"logps/rejected": -367.72894287109375, |
|
"loss": 0.7507, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8533260822296143, |
|
"rewards/margins": -0.06236880645155907, |
|
"rewards/rejected": -0.7909572124481201, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7683025771015515e-05, |
|
"logits/chosen": -2.129138946533203, |
|
"logits/rejected": -2.243818521499634, |
|
"logps/chosen": -343.4689636230469, |
|
"logps/rejected": -372.4132995605469, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.645578145980835, |
|
"rewards/margins": 0.2473578155040741, |
|
"rewards/rejected": -0.8929359912872314, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.760469692117854e-05, |
|
"logits/chosen": -2.071223735809326, |
|
"logits/rejected": -1.9962562322616577, |
|
"logps/chosen": -256.22509765625, |
|
"logps/rejected": -263.2807312011719, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.746870219707489, |
|
"rewards/margins": 0.27368226647377014, |
|
"rewards/rejected": -1.0205525159835815, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.752620180686837e-05, |
|
"logits/chosen": -2.195363998413086, |
|
"logits/rejected": -2.3401169776916504, |
|
"logps/chosen": -329.45050048828125, |
|
"logps/rejected": -367.7939758300781, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8209664821624756, |
|
"rewards/margins": 0.39947718381881714, |
|
"rewards/rejected": -1.2204437255859375, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.744754146348862e-05, |
|
"logits/chosen": -2.3540046215057373, |
|
"logits/rejected": -2.102443218231201, |
|
"logps/chosen": -439.45501708984375, |
|
"logps/rejected": -325.82647705078125, |
|
"loss": 0.7747, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0380897521972656, |
|
"rewards/margins": -0.03507265821099281, |
|
"rewards/rejected": -1.0030171871185303, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.736871692862239e-05, |
|
"logits/chosen": -2.0107619762420654, |
|
"logits/rejected": -2.133056879043579, |
|
"logps/chosen": -280.13372802734375, |
|
"logps/rejected": -372.2601623535156, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8871760368347168, |
|
"rewards/margins": 0.42683565616607666, |
|
"rewards/rejected": -1.3140116930007935, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7289729242018586e-05, |
|
"logits/chosen": -2.216970682144165, |
|
"logits/rejected": -2.1814701557159424, |
|
"logps/chosen": -248.73269653320312, |
|
"logps/rejected": -246.78436279296875, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7154771089553833, |
|
"rewards/margins": 0.5849171280860901, |
|
"rewards/rejected": -1.3003942966461182, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.721057944557819e-05, |
|
"logits/chosen": -2.026475667953491, |
|
"logits/rejected": -2.0671896934509277, |
|
"logps/chosen": -298.8050842285156, |
|
"logps/rejected": -324.1427307128906, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.523551344871521, |
|
"rewards/margins": 0.30002254247665405, |
|
"rewards/rejected": -0.823573887348175, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.713126858334052e-05, |
|
"logits/chosen": -1.7506847381591797, |
|
"logits/rejected": -1.871716022491455, |
|
"logps/chosen": -272.1562194824219, |
|
"logps/rejected": -365.52752685546875, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7902772426605225, |
|
"rewards/margins": 0.6367801427841187, |
|
"rewards/rejected": -1.4270575046539307, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.705179770146946e-05, |
|
"logits/chosen": -2.215156316757202, |
|
"logits/rejected": -2.17736554145813, |
|
"logps/chosen": -338.9674987792969, |
|
"logps/rejected": -318.7024230957031, |
|
"loss": 0.7397, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.7532888650894165, |
|
"rewards/margins": -0.0309628713876009, |
|
"rewards/rejected": -0.7223260402679443, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.697216784823967e-05, |
|
"logits/chosen": -2.0116758346557617, |
|
"logits/rejected": -2.1581830978393555, |
|
"logps/chosen": -242.60418701171875, |
|
"logps/rejected": -268.9742431640625, |
|
"loss": 0.8924, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1447850465774536, |
|
"rewards/margins": -0.10007806122303009, |
|
"rewards/rejected": -1.0447068214416504, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689238007402275e-05, |
|
"logits/chosen": -2.0335168838500977, |
|
"logits/rejected": -1.962857723236084, |
|
"logps/chosen": -239.37826538085938, |
|
"logps/rejected": -250.44223022460938, |
|
"loss": 0.7472, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.880851149559021, |
|
"rewards/margins": 0.022362351417541504, |
|
"rewards/rejected": -0.9032134413719177, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6812435431273374e-05, |
|
"logits/chosen": -2.365676164627075, |
|
"logits/rejected": -2.3459362983703613, |
|
"logps/chosen": -479.81549072265625, |
|
"logps/rejected": -578.3353271484375, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9297402501106262, |
|
"rewards/margins": 0.4031886160373688, |
|
"rewards/rejected": -1.3329288959503174, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.673233497451541e-05, |
|
"logits/chosen": -2.2611470222473145, |
|
"logits/rejected": -2.1032679080963135, |
|
"logps/chosen": -301.1606140136719, |
|
"logps/rejected": -320.68115234375, |
|
"loss": 0.9109, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.3444305658340454, |
|
"rewards/margins": -0.23366227746009827, |
|
"rewards/rejected": -1.1107683181762695, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.665207976032804e-05, |
|
"logits/chosen": -2.068887948989868, |
|
"logits/rejected": -2.339625835418701, |
|
"logps/chosen": -389.18975830078125, |
|
"logps/rejected": -526.3670654296875, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0946829319000244, |
|
"rewards/margins": 0.6931131482124329, |
|
"rewards/rejected": -1.7877960205078125, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.65716708473318e-05, |
|
"logits/chosen": -2.0643274784088135, |
|
"logits/rejected": -1.928961992263794, |
|
"logps/chosen": -350.87506103515625, |
|
"logps/rejected": -305.41510009765625, |
|
"loss": 0.8925, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.5230122804641724, |
|
"rewards/margins": -0.1989414244890213, |
|
"rewards/rejected": -1.324070930480957, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.64911092961746e-05, |
|
"logits/chosen": -1.9068621397018433, |
|
"logits/rejected": -2.146777629852295, |
|
"logps/chosen": -401.4383544921875, |
|
"logps/rejected": -405.74755859375, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1909916400909424, |
|
"rewards/margins": 0.21687617897987366, |
|
"rewards/rejected": -1.4078677892684937, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.641039616951776e-05, |
|
"logits/chosen": -1.996084451675415, |
|
"logits/rejected": -1.9715969562530518, |
|
"logps/chosen": -285.1630859375, |
|
"logps/rejected": -260.9218444824219, |
|
"loss": 0.7071, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1817179918289185, |
|
"rewards/margins": 0.04514620825648308, |
|
"rewards/rejected": -1.226864218711853, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.632953253202199e-05, |
|
"logits/chosen": -1.890580177307129, |
|
"logits/rejected": -2.0182905197143555, |
|
"logps/chosen": -329.42694091796875, |
|
"logps/rejected": -450.1892395019531, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0331366062164307, |
|
"rewards/margins": 0.35443419218063354, |
|
"rewards/rejected": -1.3875707387924194, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6248519450333315e-05, |
|
"logits/chosen": -2.3349204063415527, |
|
"logits/rejected": -2.1745338439941406, |
|
"logps/chosen": -383.373291015625, |
|
"logps/rejected": -421.9166259765625, |
|
"loss": 0.7308, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1230090856552124, |
|
"rewards/margins": 0.11969134211540222, |
|
"rewards/rejected": -1.2427003383636475, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6167357993069075e-05, |
|
"logits/chosen": -2.124786615371704, |
|
"logits/rejected": -2.2353270053863525, |
|
"logps/chosen": -398.7025146484375, |
|
"logps/rejected": -462.741455078125, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0880743265151978, |
|
"rewards/margins": 0.24901491403579712, |
|
"rewards/rejected": -1.3370893001556396, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.608604923080373e-05, |
|
"logits/chosen": -2.124338150024414, |
|
"logits/rejected": -1.9945569038391113, |
|
"logps/chosen": -410.68701171875, |
|
"logps/rejected": -351.4372863769531, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3134312629699707, |
|
"rewards/margins": 0.23311342298984528, |
|
"rewards/rejected": -1.5465446710586548, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6004594236054836e-05, |
|
"logits/chosen": -1.9870651960372925, |
|
"logits/rejected": -1.9105538129806519, |
|
"logps/chosen": -301.83245849609375, |
|
"logps/rejected": -309.3512268066406, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9611720442771912, |
|
"rewards/margins": 0.3760131001472473, |
|
"rewards/rejected": -1.337185263633728, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.592299408326883e-05, |
|
"logits/chosen": -2.201324462890625, |
|
"logits/rejected": -2.078688383102417, |
|
"logps/chosen": -331.58795166015625, |
|
"logps/rejected": -408.33770751953125, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2536283731460571, |
|
"rewards/margins": 0.36857056617736816, |
|
"rewards/rejected": -1.6221990585327148, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.584124984880689e-05, |
|
"logits/chosen": -1.9430594444274902, |
|
"logits/rejected": -2.1800172328948975, |
|
"logps/chosen": -255.10409545898438, |
|
"logps/rejected": -318.5274658203125, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7731481194496155, |
|
"rewards/margins": 0.704475462436676, |
|
"rewards/rejected": -1.4776235818862915, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.575936261093073e-05, |
|
"logits/chosen": -2.026442766189575, |
|
"logits/rejected": -2.3520658016204834, |
|
"logps/chosen": -224.86386108398438, |
|
"logps/rejected": -284.865234375, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.988176703453064, |
|
"rewards/margins": 0.25633472204208374, |
|
"rewards/rejected": -1.2445114850997925, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5677333449788374e-05, |
|
"logits/chosen": -2.2224576473236084, |
|
"logits/rejected": -2.2077910900115967, |
|
"logps/chosen": -376.9631042480469, |
|
"logps/rejected": -320.0201721191406, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.145774006843567, |
|
"rewards/margins": 0.23624449968338013, |
|
"rewards/rejected": -1.3820184469223022, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.559516344739991e-05, |
|
"logits/chosen": -2.0415990352630615, |
|
"logits/rejected": -2.077162981033325, |
|
"logps/chosen": -276.7152404785156, |
|
"logps/rejected": -296.462646484375, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8756453394889832, |
|
"rewards/margins": 0.3228147625923157, |
|
"rewards/rejected": -1.1984599828720093, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.551285368764321e-05, |
|
"logits/chosen": -2.171372890472412, |
|
"logits/rejected": -2.1741371154785156, |
|
"logps/chosen": -273.3794860839844, |
|
"logps/rejected": -258.83477783203125, |
|
"loss": 0.7963, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3004168272018433, |
|
"rewards/margins": 0.03895503282546997, |
|
"rewards/rejected": -1.3393718004226685, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543040525623965e-05, |
|
"logits/chosen": -2.0618252754211426, |
|
"logits/rejected": -2.1750998497009277, |
|
"logps/chosen": -234.17120361328125, |
|
"logps/rejected": -300.029296875, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0354472398757935, |
|
"rewards/margins": 0.45164865255355835, |
|
"rewards/rejected": -1.487095832824707, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.534781924073978e-05, |
|
"logits/chosen": -1.8313791751861572, |
|
"logits/rejected": -2.19712233543396, |
|
"logps/chosen": -263.08843994140625, |
|
"logps/rejected": -374.9747314453125, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.3530242443084717, |
|
"rewards/margins": 0.28404536843299866, |
|
"rewards/rejected": -1.6370694637298584, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5265096730508974e-05, |
|
"logits/chosen": -1.992910385131836, |
|
"logits/rejected": -2.050726890563965, |
|
"logps/chosen": -332.4587707519531, |
|
"logps/rejected": -448.9661560058594, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1743048429489136, |
|
"rewards/margins": 0.4015694260597229, |
|
"rewards/rejected": -1.5758743286132812, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.518223881671305e-05, |
|
"logits/chosen": -2.2300572395324707, |
|
"logits/rejected": -2.292898416519165, |
|
"logps/chosen": -361.424560546875, |
|
"logps/rejected": -408.99267578125, |
|
"loss": 0.7222, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.523597240447998, |
|
"rewards/margins": 0.003444090485572815, |
|
"rewards/rejected": -1.5270413160324097, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.509924659230392e-05, |
|
"logits/chosen": -1.9317662715911865, |
|
"logits/rejected": -2.1398890018463135, |
|
"logps/chosen": -201.24151611328125, |
|
"logps/rejected": -316.9627685546875, |
|
"loss": 0.7506, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.2115095853805542, |
|
"rewards/margins": 0.002029839903116226, |
|
"rewards/rejected": -1.2135393619537354, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.501612115200512e-05, |
|
"logits/chosen": -1.8960869312286377, |
|
"logits/rejected": -1.8769932985305786, |
|
"logps/chosen": -230.08914184570312, |
|
"logps/rejected": -274.8809814453125, |
|
"loss": 0.7128, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2795968055725098, |
|
"rewards/margins": 0.10936379432678223, |
|
"rewards/rejected": -1.3889607191085815, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4932863592297395e-05, |
|
"logits/chosen": -2.1262502670288086, |
|
"logits/rejected": -2.0916502475738525, |
|
"logps/chosen": -288.3411865234375, |
|
"logps/rejected": -374.0597839355469, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2651339769363403, |
|
"rewards/margins": 0.49100592732429504, |
|
"rewards/rejected": -1.7561399936676025, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4849475011404246e-05, |
|
"logits/chosen": -1.932436227798462, |
|
"logits/rejected": -2.0958409309387207, |
|
"logps/chosen": -385.390380859375, |
|
"logps/rejected": -420.717529296875, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2712079286575317, |
|
"rewards/margins": 0.29501456022262573, |
|
"rewards/rejected": -1.5662224292755127, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.476595650927741e-05, |
|
"logits/chosen": -2.2775261402130127, |
|
"logits/rejected": -2.2956340312957764, |
|
"logps/chosen": -357.61767578125, |
|
"logps/rejected": -349.096923828125, |
|
"loss": 0.736, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1353936195373535, |
|
"rewards/margins": 0.031133286654949188, |
|
"rewards/rejected": -1.1665267944335938, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.468230918758242e-05, |
|
"logits/chosen": -2.1961703300476074, |
|
"logits/rejected": -2.291398048400879, |
|
"logps/chosen": -308.3431701660156, |
|
"logps/rejected": -314.66851806640625, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1682064533233643, |
|
"rewards/margins": 0.12875376641750336, |
|
"rewards/rejected": -1.2969601154327393, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.459853414968397e-05, |
|
"logits/chosen": -2.1401329040527344, |
|
"logits/rejected": -2.0424118041992188, |
|
"logps/chosen": -323.16693115234375, |
|
"logps/rejected": -300.6637268066406, |
|
"loss": 0.7578, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.309455156326294, |
|
"rewards/margins": 0.23452845215797424, |
|
"rewards/rejected": -1.5439834594726562, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.451463250063146e-05, |
|
"logits/chosen": -2.1163456439971924, |
|
"logits/rejected": -2.154703140258789, |
|
"logps/chosen": -307.1343688964844, |
|
"logps/rejected": -349.54791259765625, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.2518941164016724, |
|
"rewards/margins": 0.306789755821228, |
|
"rewards/rejected": -1.55868399143219, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.443060534714434e-05, |
|
"logits/chosen": -1.820733666419983, |
|
"logits/rejected": -1.8521438837051392, |
|
"logps/chosen": -297.1275939941406, |
|
"logps/rejected": -264.0213623046875, |
|
"loss": 0.761, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.155602216720581, |
|
"rewards/margins": 0.04654591530561447, |
|
"rewards/rejected": -1.2021480798721313, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4346453797597576e-05, |
|
"logits/chosen": -2.069772243499756, |
|
"logits/rejected": -1.9325459003448486, |
|
"logps/chosen": -291.765869140625, |
|
"logps/rejected": -291.6902160644531, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4001034498214722, |
|
"rewards/margins": 0.3071330487728119, |
|
"rewards/rejected": -1.7072365283966064, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.426217896200699e-05, |
|
"logits/chosen": -1.9241890907287598, |
|
"logits/rejected": -1.9453171491622925, |
|
"logps/chosen": -356.77081298828125, |
|
"logps/rejected": -332.4874572753906, |
|
"loss": 0.7444, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3764888048171997, |
|
"rewards/margins": 0.23870763182640076, |
|
"rewards/rejected": -1.6151964664459229, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.417778195201464e-05, |
|
"logits/chosen": -2.045527935028076, |
|
"logits/rejected": -2.2840356826782227, |
|
"logps/chosen": -358.7695617675781, |
|
"logps/rejected": -411.1256408691406, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.457747459411621, |
|
"rewards/margins": 0.2899523377418518, |
|
"rewards/rejected": -1.7476999759674072, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.4093263880874136e-05, |
|
"logits/chosen": -2.2168030738830566, |
|
"logits/rejected": -2.026329755783081, |
|
"logps/chosen": -391.6089172363281, |
|
"logps/rejected": -444.5746154785156, |
|
"loss": 0.7252, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.3314502239227295, |
|
"rewards/margins": 0.08630897104740143, |
|
"rewards/rejected": -1.4177591800689697, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.400862586343597e-05, |
|
"logits/chosen": -2.105616569519043, |
|
"logits/rejected": -2.0884623527526855, |
|
"logps/chosen": -383.8026123046875, |
|
"logps/rejected": -367.6466064453125, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.5232832431793213, |
|
"rewards/margins": 0.3972979784011841, |
|
"rewards/rejected": -1.920581340789795, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.392386901613282e-05, |
|
"logits/chosen": -2.2307028770446777, |
|
"logits/rejected": -1.9826226234436035, |
|
"logps/chosen": -296.00274658203125, |
|
"logps/rejected": -274.8576354980469, |
|
"loss": 0.8504, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0603615045547485, |
|
"rewards/margins": -0.11922366172075272, |
|
"rewards/rejected": -0.9411377310752869, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.383899445696477e-05, |
|
"logits/chosen": -1.8530570268630981, |
|
"logits/rejected": -1.8290507793426514, |
|
"logps/chosen": -352.0951843261719, |
|
"logps/rejected": -424.3826599121094, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2079800367355347, |
|
"rewards/margins": 0.37576451897621155, |
|
"rewards/rejected": -1.5837446451187134, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.375400330548466e-05, |
|
"logits/chosen": -2.0532162189483643, |
|
"logits/rejected": -1.988155722618103, |
|
"logps/chosen": -418.684814453125, |
|
"logps/rejected": -460.6349792480469, |
|
"loss": 0.8049, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.395944595336914, |
|
"rewards/margins": -0.09554408490657806, |
|
"rewards/rejected": -1.3004004955291748, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.366889668278321e-05, |
|
"logits/chosen": -1.9795726537704468, |
|
"logits/rejected": -2.1528823375701904, |
|
"logps/chosen": -267.9879455566406, |
|
"logps/rejected": -301.9024963378906, |
|
"loss": 0.7124, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9974009990692139, |
|
"rewards/margins": 0.12711931765079498, |
|
"rewards/rejected": -1.1245203018188477, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.358367571147433e-05, |
|
"logits/chosen": -1.886863112449646, |
|
"logits/rejected": -2.1218717098236084, |
|
"logps/chosen": -370.65704345703125, |
|
"logps/rejected": -416.7871398925781, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.184512734413147, |
|
"rewards/margins": 0.2182311713695526, |
|
"rewards/rejected": -1.4027438163757324, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3498341515680214e-05, |
|
"logits/chosen": -2.1803653240203857, |
|
"logits/rejected": -2.1077873706817627, |
|
"logps/chosen": -321.48834228515625, |
|
"logps/rejected": -281.7978820800781, |
|
"loss": 0.7596, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9977477192878723, |
|
"rewards/margins": -0.019190065562725067, |
|
"rewards/rejected": -0.9785577058792114, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3412895221016605e-05, |
|
"logits/chosen": -1.888815999031067, |
|
"logits/rejected": -2.005762815475464, |
|
"logps/chosen": -184.04368591308594, |
|
"logps/rejected": -236.44418334960938, |
|
"loss": 0.9197, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0804985761642456, |
|
"rewards/margins": -0.14094766974449158, |
|
"rewards/rejected": -0.9395509958267212, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.332733795457789e-05, |
|
"logits/chosen": -1.9857516288757324, |
|
"logits/rejected": -1.7923184633255005, |
|
"logps/chosen": -267.1208801269531, |
|
"logps/rejected": -263.0241394042969, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9991573095321655, |
|
"rewards/margins": 0.37854859232902527, |
|
"rewards/rejected": -1.3777059316635132, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.324167084492226e-05, |
|
"logits/chosen": -1.880125880241394, |
|
"logits/rejected": -2.0244104862213135, |
|
"logps/chosen": -300.0595703125, |
|
"logps/rejected": -503.9610290527344, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1374258995056152, |
|
"rewards/margins": 0.9749306440353394, |
|
"rewards/rejected": -2.112356662750244, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3155895022056784e-05, |
|
"logits/chosen": -2.1099507808685303, |
|
"logits/rejected": -2.1651086807250977, |
|
"logps/chosen": -297.15704345703125, |
|
"logps/rejected": -323.30389404296875, |
|
"loss": 0.7954, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2452776432037354, |
|
"rewards/margins": -0.07082469016313553, |
|
"rewards/rejected": -1.1744530200958252, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3070011617422566e-05, |
|
"logits/chosen": -1.9197720289230347, |
|
"logits/rejected": -1.991129755973816, |
|
"logps/chosen": -326.0094909667969, |
|
"logps/rejected": -374.349609375, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8590810298919678, |
|
"rewards/margins": 0.45791155099868774, |
|
"rewards/rejected": -1.3169926404953003, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2984021763879755e-05, |
|
"logits/chosen": -2.2275571823120117, |
|
"logits/rejected": -2.2043910026550293, |
|
"logps/chosen": -371.75927734375, |
|
"logps/rejected": -395.1778564453125, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.126926302909851, |
|
"rewards/margins": 0.2308472990989685, |
|
"rewards/rejected": -1.3577736616134644, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2897926595692664e-05, |
|
"logits/chosen": -2.181673526763916, |
|
"logits/rejected": -2.3108913898468018, |
|
"logps/chosen": -357.79888916015625, |
|
"logps/rejected": -431.6429443359375, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9887603521347046, |
|
"rewards/margins": 0.32495391368865967, |
|
"rewards/rejected": -1.3137142658233643, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2811727248514754e-05, |
|
"logits/chosen": -2.1919546127319336, |
|
"logits/rejected": -2.1633992195129395, |
|
"logps/chosen": -421.3163757324219, |
|
"logps/rejected": -446.4229431152344, |
|
"loss": 0.8466, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.3915138244628906, |
|
"rewards/margins": -0.18495866656303406, |
|
"rewards/rejected": -1.2065550088882446, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937369e-05, |
|
"logits/chosen": -2.2480454444885254, |
|
"logits/rejected": -2.2334165573120117, |
|
"logps/chosen": -532.5480346679688, |
|
"logps/rejected": -461.50408935546875, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.161598563194275, |
|
"rewards/margins": 0.1340804100036621, |
|
"rewards/rejected": -1.2956790924072266, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.263902056665631e-05, |
|
"logits/chosen": -2.1135687828063965, |
|
"logits/rejected": -1.9993455410003662, |
|
"logps/chosen": -338.2395324707031, |
|
"logps/rejected": -354.9482727050781, |
|
"loss": 0.7578, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -1.4973227977752686, |
|
"rewards/margins": -0.08258108794689178, |
|
"rewards/rejected": -1.4147417545318604, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2552515510093674e-05, |
|
"logits/chosen": -1.923673391342163, |
|
"logits/rejected": -2.005218982696533, |
|
"logps/chosen": -276.5377502441406, |
|
"logps/rejected": -285.30633544921875, |
|
"loss": 0.8311, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4315433502197266, |
|
"rewards/margins": 0.01938357949256897, |
|
"rewards/rejected": -1.4509271383285522, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2465910830745924e-05, |
|
"logits/chosen": -2.0653865337371826, |
|
"logits/rejected": -2.0072455406188965, |
|
"logps/chosen": -217.55328369140625, |
|
"logps/rejected": -182.0865936279297, |
|
"loss": 0.8905, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2046016454696655, |
|
"rewards/margins": -0.10025043040513992, |
|
"rewards/rejected": -1.104351282119751, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.237920767098735e-05, |
|
"logits/chosen": -2.1028592586517334, |
|
"logits/rejected": -2.201251745223999, |
|
"logps/chosen": -325.9986572265625, |
|
"logps/rejected": -432.1500244140625, |
|
"loss": 0.7399, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.1958823204040527, |
|
"rewards/margins": 0.0005789399147033691, |
|
"rewards/rejected": -1.1964612007141113, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.229240717449122e-05, |
|
"logits/chosen": -2.1992039680480957, |
|
"logits/rejected": -2.378601312637329, |
|
"logps/chosen": -370.0502014160156, |
|
"logps/rejected": -408.4455871582031, |
|
"loss": 0.7355, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.168852686882019, |
|
"rewards/margins": -0.0028562992811203003, |
|
"rewards/rejected": -1.1659963130950928, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2205510486214777e-05, |
|
"logits/chosen": -2.1426610946655273, |
|
"logits/rejected": -2.160429000854492, |
|
"logps/chosen": -307.41278076171875, |
|
"logps/rejected": -312.3187561035156, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0617390871047974, |
|
"rewards/margins": 0.29938191175460815, |
|
"rewards/rejected": -1.3611209392547607, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.211851875238408e-05, |
|
"logits/chosen": -2.1584270000457764, |
|
"logits/rejected": -1.9756699800491333, |
|
"logps/chosen": -247.97222900390625, |
|
"logps/rejected": -275.8572692871094, |
|
"loss": 0.7132, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2110902070999146, |
|
"rewards/margins": 0.21431638300418854, |
|
"rewards/rejected": -1.425406575202942, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.203143312047889e-05, |
|
"logits/chosen": -2.284712314605713, |
|
"logits/rejected": -2.316433906555176, |
|
"logps/chosen": -397.7944030761719, |
|
"logps/rejected": -435.2258605957031, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0755599737167358, |
|
"rewards/margins": 0.4171152710914612, |
|
"rewards/rejected": -1.4926753044128418, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1944254739217585e-05, |
|
"logits/chosen": -2.3242409229278564, |
|
"logits/rejected": -2.2877395153045654, |
|
"logps/chosen": -317.1751403808594, |
|
"logps/rejected": -319.5546569824219, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0918810367584229, |
|
"rewards/margins": 0.174340158700943, |
|
"rewards/rejected": -1.266221284866333, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1856984758541924e-05, |
|
"logits/chosen": -2.1573104858398438, |
|
"logits/rejected": -2.1013479232788086, |
|
"logps/chosen": -402.46051025390625, |
|
"logps/rejected": -317.1357421875, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2507472038269043, |
|
"rewards/margins": 0.32065922021865845, |
|
"rewards/rejected": -1.5714064836502075, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.176962432960197e-05, |
|
"logits/chosen": -2.3160417079925537, |
|
"logits/rejected": -2.1284031867980957, |
|
"logps/chosen": -401.79803466796875, |
|
"logps/rejected": -336.5020446777344, |
|
"loss": 0.8215, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1392488479614258, |
|
"rewards/margins": -0.045140765607357025, |
|
"rewards/rejected": -1.094107985496521, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.168217460474081e-05, |
|
"logits/chosen": -2.1930458545684814, |
|
"logits/rejected": -2.081754684448242, |
|
"logps/chosen": -408.9210510253906, |
|
"logps/rejected": -364.5238037109375, |
|
"loss": 0.7234, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2171686887741089, |
|
"rewards/margins": 0.20472437143325806, |
|
"rewards/rejected": -1.4218928813934326, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.159463673747945e-05, |
|
"logits/chosen": -1.8765006065368652, |
|
"logits/rejected": -1.8720567226409912, |
|
"logps/chosen": -301.31402587890625, |
|
"logps/rejected": -357.6469421386719, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7754146456718445, |
|
"rewards/margins": 0.04356713593006134, |
|
"rewards/rejected": -0.8189818263053894, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.150701188250152e-05, |
|
"logits/chosen": -2.129390001296997, |
|
"logits/rejected": -1.9843943119049072, |
|
"logps/chosen": -334.6359558105469, |
|
"logps/rejected": -345.7218322753906, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9497905373573303, |
|
"rewards/margins": 0.12164813280105591, |
|
"rewards/rejected": -1.0714386701583862, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.141930119563812e-05, |
|
"logits/chosen": -1.9846031665802002, |
|
"logits/rejected": -2.1700026988983154, |
|
"logps/chosen": -316.71832275390625, |
|
"logps/rejected": -362.517822265625, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0000622272491455, |
|
"rewards/margins": 0.041672270745038986, |
|
"rewards/rejected": -1.0417344570159912, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.133150583385247e-05, |
|
"logits/chosen": -2.246786594390869, |
|
"logits/rejected": -2.29536771774292, |
|
"logps/chosen": -401.23394775390625, |
|
"logps/rejected": -394.4549865722656, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0588868856430054, |
|
"rewards/margins": 0.1614934504032135, |
|
"rewards/rejected": -1.220380425453186, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.124362695522476e-05, |
|
"logits/chosen": -1.9954516887664795, |
|
"logits/rejected": -2.391841411590576, |
|
"logps/chosen": -270.8871154785156, |
|
"logps/rejected": -376.65106201171875, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9002559781074524, |
|
"rewards/margins": 0.10894503444433212, |
|
"rewards/rejected": -1.0092010498046875, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.115566571893681e-05, |
|
"logits/chosen": -2.2636733055114746, |
|
"logits/rejected": -2.159451484680176, |
|
"logps/chosen": -318.85888671875, |
|
"logps/rejected": -295.5828857421875, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0352829694747925, |
|
"rewards/margins": 0.12933281064033508, |
|
"rewards/rejected": -1.1646157503128052, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1067623285256766e-05, |
|
"logits/chosen": -2.083453893661499, |
|
"logits/rejected": -2.077559471130371, |
|
"logps/chosen": -277.09197998046875, |
|
"logps/rejected": -313.90728759765625, |
|
"loss": 0.7098, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8074424266815186, |
|
"rewards/margins": 0.011367838829755783, |
|
"rewards/rejected": -0.8188102841377258, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.097950081552387e-05, |
|
"logits/chosen": -2.062037467956543, |
|
"logits/rejected": -1.969789981842041, |
|
"logps/chosen": -272.09674072265625, |
|
"logps/rejected": -282.55560302734375, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8802644610404968, |
|
"rewards/margins": 0.13163542747497559, |
|
"rewards/rejected": -1.0118999481201172, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.089129947213305e-05, |
|
"logits/chosen": -2.0714964866638184, |
|
"logits/rejected": -2.0289595127105713, |
|
"logps/chosen": -338.4779968261719, |
|
"logps/rejected": -305.9598083496094, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8691202998161316, |
|
"rewards/margins": 0.25768163800239563, |
|
"rewards/rejected": -1.1268019676208496, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.080302041851966e-05, |
|
"logits/chosen": -2.1845309734344482, |
|
"logits/rejected": -2.330949306488037, |
|
"logps/chosen": -318.61444091796875, |
|
"logps/rejected": -337.0727844238281, |
|
"loss": 0.7127, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0258458852767944, |
|
"rewards/margins": 0.06928322464227676, |
|
"rewards/rejected": -1.0951290130615234, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.071466481914409e-05, |
|
"logits/chosen": -1.974360704421997, |
|
"logits/rejected": -1.8974449634552002, |
|
"logps/chosen": -328.3250732421875, |
|
"logps/rejected": -353.5456237792969, |
|
"loss": 0.811, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0966472625732422, |
|
"rewards/margins": -0.16713061928749084, |
|
"rewards/rejected": -0.9295165538787842, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.062623383947643e-05, |
|
"logits/chosen": -1.96671462059021, |
|
"logits/rejected": -2.175767421722412, |
|
"logps/chosen": -325.94085693359375, |
|
"logps/rejected": -443.0379943847656, |
|
"loss": 0.7812, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.099274754524231, |
|
"rewards/margins": -0.10562913119792938, |
|
"rewards/rejected": -0.9936455488204956, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.053772864598108e-05, |
|
"logits/chosen": -1.8880025148391724, |
|
"logits/rejected": -2.134125232696533, |
|
"logps/chosen": -347.37060546875, |
|
"logps/rejected": -415.47735595703125, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.082190990447998, |
|
"rewards/margins": 0.09107710421085358, |
|
"rewards/rejected": -1.1732680797576904, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0449150406101367e-05, |
|
"logits/chosen": -1.7812613248825073, |
|
"logits/rejected": -1.7713969945907593, |
|
"logps/chosen": -293.71978759765625, |
|
"logps/rejected": -305.0588073730469, |
|
"loss": 0.712, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0079898834228516, |
|
"rewards/margins": 0.062352173030376434, |
|
"rewards/rejected": -1.0703420639038086, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0360500288244155e-05, |
|
"logits/chosen": -2.2740883827209473, |
|
"logits/rejected": -2.167978286743164, |
|
"logps/chosen": -434.8294372558594, |
|
"logps/rejected": -422.0832214355469, |
|
"loss": 0.784, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0591871738433838, |
|
"rewards/margins": -0.10172367095947266, |
|
"rewards/rejected": -0.9574634432792664, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0271779461764426e-05, |
|
"logits/chosen": -2.059706926345825, |
|
"logits/rejected": -2.0634119510650635, |
|
"logps/chosen": -401.7886047363281, |
|
"logps/rejected": -436.7006530761719, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9169749021530151, |
|
"rewards/margins": 0.19741462171077728, |
|
"rewards/rejected": -1.114389419555664, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.018298909694986e-05, |
|
"logits/chosen": -2.1678502559661865, |
|
"logits/rejected": -2.2958884239196777, |
|
"logps/chosen": -328.1385498046875, |
|
"logps/rejected": -411.2935485839844, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.87379390001297, |
|
"rewards/margins": 0.17571806907653809, |
|
"rewards/rejected": -1.0495120286941528, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0094130365005395e-05, |
|
"logits/chosen": -2.092456817626953, |
|
"logits/rejected": -2.266845226287842, |
|
"logps/chosen": -225.6428680419922, |
|
"logps/rejected": -335.9315185546875, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7999565601348877, |
|
"rewards/margins": 0.19611772894859314, |
|
"rewards/rejected": -0.996074378490448, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0005204438037765e-05, |
|
"logits/chosen": -2.205846071243286, |
|
"logits/rejected": -2.052035093307495, |
|
"logps/chosen": -361.5894775390625, |
|
"logps/rejected": -309.0181884765625, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9857699871063232, |
|
"rewards/margins": 0.09481573104858398, |
|
"rewards/rejected": -1.0805857181549072, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.991621248904007e-05, |
|
"logits/chosen": -2.26173996925354, |
|
"logits/rejected": -2.031365394592285, |
|
"logps/chosen": -369.19915771484375, |
|
"logps/rejected": -294.8736877441406, |
|
"loss": 0.7289, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.128753423690796, |
|
"rewards/margins": -0.01859595626592636, |
|
"rewards/rejected": -1.1101574897766113, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9827155691876262e-05, |
|
"logits/chosen": -1.8554052114486694, |
|
"logits/rejected": -2.2059624195098877, |
|
"logps/chosen": -317.9852600097656, |
|
"logps/rejected": -354.1583251953125, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9314447045326233, |
|
"rewards/margins": 0.35100990533828735, |
|
"rewards/rejected": -1.2824546098709106, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.973803522126571e-05, |
|
"logits/chosen": -1.9700522422790527, |
|
"logits/rejected": -1.8781499862670898, |
|
"logps/chosen": -307.0520935058594, |
|
"logps/rejected": -313.407470703125, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0591685771942139, |
|
"rewards/margins": 0.05153223127126694, |
|
"rewards/rejected": -1.1107008457183838, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9648852252767668e-05, |
|
"logits/chosen": -2.1033730506896973, |
|
"logits/rejected": -2.0786190032958984, |
|
"logps/chosen": -452.3450012207031, |
|
"logps/rejected": -511.24700927734375, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.161694884300232, |
|
"rewards/margins": 0.10767564922571182, |
|
"rewards/rejected": -1.2693705558776855, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9559607962765773e-05, |
|
"logits/chosen": -2.103732109069824, |
|
"logits/rejected": -2.215973377227783, |
|
"logps/chosen": -326.31787109375, |
|
"logps/rejected": -398.490478515625, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.019250750541687, |
|
"rewards/margins": 0.20697328448295593, |
|
"rewards/rejected": -1.2262240648269653, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.947030352845255e-05, |
|
"logits/chosen": -2.1636712551116943, |
|
"logits/rejected": -2.1790904998779297, |
|
"logps/chosen": -335.7892761230469, |
|
"logps/rejected": -397.0627746582031, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9784788489341736, |
|
"rewards/margins": 0.27349093556404114, |
|
"rewards/rejected": -1.251969814300537, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9380940127813834e-05, |
|
"logits/chosen": -2.165933609008789, |
|
"logits/rejected": -2.2048957347869873, |
|
"logps/chosen": -421.8327331542969, |
|
"logps/rejected": -405.9539794921875, |
|
"loss": 0.7287, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.179675579071045, |
|
"rewards/margins": 0.05797319859266281, |
|
"rewards/rejected": -1.2376487255096436, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9291518939613315e-05, |
|
"logits/chosen": -2.1944916248321533, |
|
"logits/rejected": -2.3757896423339844, |
|
"logps/chosen": -486.90582275390625, |
|
"logps/rejected": -422.9759521484375, |
|
"loss": 0.703, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3329401016235352, |
|
"rewards/margins": 0.04630117490887642, |
|
"rewards/rejected": -1.3792412281036377, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9202041143376896e-05, |
|
"logits/chosen": -1.9283084869384766, |
|
"logits/rejected": -2.046499490737915, |
|
"logps/chosen": -340.8263244628906, |
|
"logps/rejected": -354.3287658691406, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.017566204071045, |
|
"rewards/margins": 0.2470768392086029, |
|
"rewards/rejected": -1.2646431922912598, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9112507919377213e-05, |
|
"logits/chosen": -1.9073665142059326, |
|
"logits/rejected": -1.9913743734359741, |
|
"logps/chosen": -233.45367431640625, |
|
"logps/rejected": -262.6000061035156, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9823710918426514, |
|
"rewards/margins": 0.10720720142126083, |
|
"rewards/rejected": -1.0895782709121704, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.9022920448618e-05, |
|
"logits/chosen": -2.0144119262695312, |
|
"logits/rejected": -2.010000228881836, |
|
"logps/chosen": -270.91790771484375, |
|
"logps/rejected": -324.4449462890625, |
|
"loss": 0.7047, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.950652539730072, |
|
"rewards/margins": 0.02743140608072281, |
|
"rewards/rejected": -0.9780839681625366, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8933279912818566e-05, |
|
"logits/chosen": -2.200765609741211, |
|
"logits/rejected": -1.9420582056045532, |
|
"logps/chosen": -326.8210144042969, |
|
"logps/rejected": -328.5808410644531, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0532310009002686, |
|
"rewards/margins": 0.0682058334350586, |
|
"rewards/rejected": -1.1214368343353271, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8843587494398177e-05, |
|
"logits/chosen": -2.297065496444702, |
|
"logits/rejected": -2.367543935775757, |
|
"logps/chosen": -335.68994140625, |
|
"logps/rejected": -337.0887756347656, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0686800479888916, |
|
"rewards/margins": 0.21843653917312622, |
|
"rewards/rejected": -1.287116527557373, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.875384437646046e-05, |
|
"logits/chosen": -2.2880589962005615, |
|
"logits/rejected": -2.273855209350586, |
|
"logps/chosen": -306.8229675292969, |
|
"logps/rejected": -322.33050537109375, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.9717899560928345, |
|
"rewards/margins": 0.5251017808914185, |
|
"rewards/rejected": -1.4968918561935425, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8664051742777803e-05, |
|
"logits/chosen": -2.119661569595337, |
|
"logits/rejected": -1.9243615865707397, |
|
"logps/chosen": -326.88348388671875, |
|
"logps/rejected": -394.1810302734375, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9266807436943054, |
|
"rewards/margins": 0.20886686444282532, |
|
"rewards/rejected": -1.1355476379394531, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8574210777775755e-05, |
|
"logits/chosen": -2.2085232734680176, |
|
"logits/rejected": -2.188079357147217, |
|
"logps/chosen": -315.6412353515625, |
|
"logps/rejected": -290.0874938964844, |
|
"loss": 0.7217, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.09775710105896, |
|
"rewards/margins": 0.024385623633861542, |
|
"rewards/rejected": -1.1221426725387573, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8484322666517373e-05, |
|
"logits/chosen": -2.236124277114868, |
|
"logits/rejected": -2.3427038192749023, |
|
"logps/chosen": -274.1993103027344, |
|
"logps/rejected": -298.519775390625, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9323225617408752, |
|
"rewards/margins": 0.053042903542518616, |
|
"rewards/rejected": -0.9853654503822327, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.83943885946876e-05, |
|
"logits/chosen": -2.3090035915374756, |
|
"logits/rejected": -2.3383147716522217, |
|
"logps/chosen": -351.7496643066406, |
|
"logps/rejected": -356.39678955078125, |
|
"loss": 0.776, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.235669493675232, |
|
"rewards/margins": -0.09604780375957489, |
|
"rewards/rejected": -1.139621615409851, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8304409748577653e-05, |
|
"logits/chosen": -2.266005516052246, |
|
"logits/rejected": -2.2317018508911133, |
|
"logps/chosen": -342.5093994140625, |
|
"logps/rejected": -360.4115295410156, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.800186276435852, |
|
"rewards/margins": 0.3189813792705536, |
|
"rewards/rejected": -1.119167685508728, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.821438731506933e-05, |
|
"logits/chosen": -2.1894872188568115, |
|
"logits/rejected": -2.2929162979125977, |
|
"logps/chosen": -366.48651123046875, |
|
"logps/rejected": -422.5482177734375, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1337993144989014, |
|
"rewards/margins": 0.18402203917503357, |
|
"rewards/rejected": -1.3178215026855469, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8124322481619388e-05, |
|
"logits/chosen": -2.1163060665130615, |
|
"logits/rejected": -2.0039188861846924, |
|
"logps/chosen": -361.08380126953125, |
|
"logps/rejected": -269.610595703125, |
|
"loss": 0.7452, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3107686042785645, |
|
"rewards/margins": -0.045146312564611435, |
|
"rewards/rejected": -1.2656222581863403, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.803421643624386e-05, |
|
"logits/chosen": -2.105262041091919, |
|
"logits/rejected": -1.973724126815796, |
|
"logps/chosen": -311.098388671875, |
|
"logps/rejected": -314.92181396484375, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0980561971664429, |
|
"rewards/margins": 0.178094744682312, |
|
"rewards/rejected": -1.2761509418487549, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7944070367502402e-05, |
|
"logits/chosen": -2.1020731925964355, |
|
"logits/rejected": -2.2140867710113525, |
|
"logps/chosen": -277.33599853515625, |
|
"logps/rejected": -283.2631530761719, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.192931056022644, |
|
"rewards/margins": 0.10631629824638367, |
|
"rewards/rejected": -1.2992472648620605, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 965, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|