|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.676934003829956, |
|
"logits/rejected": -2.509021043777466, |
|
"logps/chosen": -304.709228515625, |
|
"logps/rejected": -229.49505615234375, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.5309348106384277, |
|
"logits/rejected": -2.5985612869262695, |
|
"logps/chosen": -313.4403381347656, |
|
"logps/rejected": -277.2276611328125, |
|
"loss": 0.6812, |
|
"pred_label": 18.55555534362793, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.0022232607007026672, |
|
"rewards/margins": -7.403641939163208e-05, |
|
"rewards/rejected": 0.0022972968872636557, |
|
"step": 10, |
|
"use_label": 71.44444274902344 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.390097141265869, |
|
"logits/rejected": -2.4532761573791504, |
|
"logps/chosen": -229.01559448242188, |
|
"logps/rejected": -232.5874786376953, |
|
"loss": 0.6791, |
|
"pred_label": 61.54999923706055, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.001973286736756563, |
|
"rewards/margins": -0.0003373834188096225, |
|
"rewards/rejected": -0.0016359034925699234, |
|
"step": 20, |
|
"use_label": 180.4499969482422 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.529320001602173, |
|
"logits/rejected": -2.4789929389953613, |
|
"logps/chosen": -270.6199951171875, |
|
"logps/rejected": -244.80044555664062, |
|
"loss": 0.6782, |
|
"pred_label": 106.80000305175781, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.006186917424201965, |
|
"rewards/margins": -0.005202265456318855, |
|
"rewards/rejected": -0.0009846522007137537, |
|
"step": 30, |
|
"use_label": 295.20001220703125 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.4986624717712402, |
|
"logits/rejected": -2.5215377807617188, |
|
"logps/chosen": -270.62274169921875, |
|
"logps/rejected": -263.65087890625, |
|
"loss": 0.6789, |
|
"pred_label": 156.22500610351562, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0010104719549417496, |
|
"rewards/margins": 0.0030316715128719807, |
|
"rewards/rejected": -0.004042143002152443, |
|
"step": 40, |
|
"use_label": 405.7749938964844 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.531646490097046, |
|
"logits/rejected": -2.5199639797210693, |
|
"logps/chosen": -255.382080078125, |
|
"logps/rejected": -249.44775390625, |
|
"loss": 0.6778, |
|
"pred_label": 213.4250030517578, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0030100971926003695, |
|
"rewards/margins": -0.00096442288486287, |
|
"rewards/rejected": -0.0020456742495298386, |
|
"step": 50, |
|
"use_label": 508.57501220703125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.4867427349090576, |
|
"logits/rejected": -2.473141670227051, |
|
"logps/chosen": -278.27655029296875, |
|
"logps/rejected": -257.5716247558594, |
|
"loss": 0.6784, |
|
"pred_label": 266.17498779296875, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0048277489840984344, |
|
"rewards/margins": -0.004098966252058744, |
|
"rewards/rejected": -0.0007287820335477591, |
|
"step": 60, |
|
"use_label": 615.8250122070312 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.4988815784454346, |
|
"logits/rejected": -2.492649793624878, |
|
"logps/chosen": -285.4491271972656, |
|
"logps/rejected": -263.70635986328125, |
|
"loss": 0.6793, |
|
"pred_label": 313.125, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.004624291323125362, |
|
"rewards/margins": -0.003086260985583067, |
|
"rewards/rejected": -0.0015380297554656863, |
|
"step": 70, |
|
"use_label": 728.875 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.447936534881592, |
|
"logits/rejected": -2.440683126449585, |
|
"logps/chosen": -292.83795166015625, |
|
"logps/rejected": -262.8486328125, |
|
"loss": 0.6787, |
|
"pred_label": 365.5, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.00346105033531785, |
|
"rewards/margins": -0.00289200060069561, |
|
"rewards/rejected": -0.0005690503749065101, |
|
"step": 80, |
|
"use_label": 836.5 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.410609006881714, |
|
"logits/rejected": -2.4672443866729736, |
|
"logps/chosen": -281.00421142578125, |
|
"logps/rejected": -256.7078552246094, |
|
"loss": 0.6794, |
|
"pred_label": 423.875, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0006400573183782399, |
|
"rewards/margins": -0.0020913761109113693, |
|
"rewards/rejected": 0.002731433603912592, |
|
"step": 90, |
|
"use_label": 938.125 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922002e-07, |
|
"logits/chosen": -2.4385340213775635, |
|
"logits/rejected": -2.401470899581909, |
|
"logps/chosen": -263.50323486328125, |
|
"logps/rejected": -282.2510070800781, |
|
"loss": 0.6781, |
|
"pred_label": 479.6000061035156, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007567421533167362, |
|
"rewards/margins": 0.010446270927786827, |
|
"rewards/rejected": -0.002878849394619465, |
|
"step": 100, |
|
"use_label": 1042.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918509895227007e-07, |
|
"logits/chosen": -2.4747567176818848, |
|
"logits/rejected": -2.4313316345214844, |
|
"logps/chosen": -255.5030975341797, |
|
"logps/rejected": -241.1230926513672, |
|
"loss": 0.6776, |
|
"pred_label": 530.625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0008927445742301643, |
|
"rewards/margins": 0.001805878826417029, |
|
"rewards/rejected": -0.0009131338447332382, |
|
"step": 110, |
|
"use_label": 1151.375 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.860302677532014e-07, |
|
"logits/chosen": -2.432814836502075, |
|
"logits/rejected": -2.4882383346557617, |
|
"logps/chosen": -266.65386962890625, |
|
"logps/rejected": -237.3206787109375, |
|
"loss": 0.6794, |
|
"pred_label": 576.875, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.008614806458353996, |
|
"rewards/margins": 0.01159285381436348, |
|
"rewards/rejected": -0.0029780478216707706, |
|
"step": 120, |
|
"use_label": 1265.125 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.802095459837019e-07, |
|
"logits/chosen": -2.498955249786377, |
|
"logits/rejected": -2.49072003364563, |
|
"logps/chosen": -295.6458435058594, |
|
"logps/rejected": -263.4668273925781, |
|
"loss": 0.6774, |
|
"pred_label": 627.4749755859375, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.00027961478917859495, |
|
"rewards/margins": 0.0010693027870729566, |
|
"rewards/rejected": -0.001348917605355382, |
|
"step": 130, |
|
"use_label": 1374.5250244140625 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.743888242142026e-07, |
|
"logits/chosen": -2.50268292427063, |
|
"logits/rejected": -2.506229877471924, |
|
"logps/chosen": -249.8160400390625, |
|
"logps/rejected": -228.35537719726562, |
|
"loss": 0.6792, |
|
"pred_label": 681.7999877929688, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0006030676886439323, |
|
"rewards/margins": -0.00023779459297657013, |
|
"rewards/rejected": 0.0008408633293583989, |
|
"step": 140, |
|
"use_label": 1480.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.685681024447031e-07, |
|
"logits/chosen": -2.430985689163208, |
|
"logits/rejected": -2.438312292098999, |
|
"logps/chosen": -282.99285888671875, |
|
"logps/rejected": -260.1983947753906, |
|
"loss": 0.6764, |
|
"pred_label": 730.6749877929688, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.007840326987206936, |
|
"rewards/margins": 0.008648511953651905, |
|
"rewards/rejected": -0.0008081849664449692, |
|
"step": 150, |
|
"use_label": 1591.324951171875 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.627473806752037e-07, |
|
"logits/chosen": -2.513338565826416, |
|
"logits/rejected": -2.4252638816833496, |
|
"logps/chosen": -251.98593139648438, |
|
"logps/rejected": -258.3878479003906, |
|
"loss": 0.6779, |
|
"pred_label": 785.4500122070312, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.009579241275787354, |
|
"rewards/margins": 0.013932722620666027, |
|
"rewards/rejected": -0.004353481810539961, |
|
"step": 160, |
|
"use_label": 1696.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5692665890570433e-07, |
|
"logits/chosen": -2.51324725151062, |
|
"logits/rejected": -2.5183651447296143, |
|
"logps/chosen": -286.78240966796875, |
|
"logps/rejected": -242.28451538085938, |
|
"loss": 0.6781, |
|
"pred_label": 837.375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.010844933800399303, |
|
"rewards/margins": 0.014444952830672264, |
|
"rewards/rejected": -0.003600016701966524, |
|
"step": 170, |
|
"use_label": 1804.625 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5110593713620486e-07, |
|
"logits/chosen": -2.4405276775360107, |
|
"logits/rejected": -2.418701410293579, |
|
"logps/chosen": -245.29843139648438, |
|
"logps/rejected": -261.94500732421875, |
|
"loss": 0.6779, |
|
"pred_label": 900.5250244140625, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -9.365100413560867e-05, |
|
"rewards/margins": 0.0037569026462733746, |
|
"rewards/rejected": -0.003850553184747696, |
|
"step": 180, |
|
"use_label": 1901.4749755859375 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4528521536670544e-07, |
|
"logits/chosen": -2.549407482147217, |
|
"logits/rejected": -2.5117835998535156, |
|
"logps/chosen": -291.4512939453125, |
|
"logps/rejected": -250.8589324951172, |
|
"loss": 0.6749, |
|
"pred_label": 960.4500122070312, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0050302534364163876, |
|
"rewards/margins": 0.01830260455608368, |
|
"rewards/rejected": -0.013272350654006004, |
|
"step": 190, |
|
"use_label": 2001.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3946449359720607e-07, |
|
"logits/chosen": -2.521691083908081, |
|
"logits/rejected": -2.522428035736084, |
|
"logps/chosen": -251.04867553710938, |
|
"logps/rejected": -259.01055908203125, |
|
"loss": 0.6754, |
|
"pred_label": 1024.925048828125, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.004073199350386858, |
|
"rewards/margins": 0.008589169010519981, |
|
"rewards/rejected": -0.004515970591455698, |
|
"step": 200, |
|
"use_label": 2097.074951171875 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.336437718277066e-07, |
|
"logits/chosen": -2.5071074962615967, |
|
"logits/rejected": -2.5461485385894775, |
|
"logps/chosen": -288.81842041015625, |
|
"logps/rejected": -246.823486328125, |
|
"loss": 0.6752, |
|
"pred_label": 1087.875, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.011537188664078712, |
|
"rewards/margins": 0.013686036691069603, |
|
"rewards/rejected": -0.0021488501224666834, |
|
"step": 210, |
|
"use_label": 2194.125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278230500582072e-07, |
|
"logits/chosen": -2.4844157695770264, |
|
"logits/rejected": -2.4883625507354736, |
|
"logps/chosen": -263.35015869140625, |
|
"logps/rejected": -261.3338623046875, |
|
"loss": 0.6738, |
|
"pred_label": 1153.574951171875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0034189000725746155, |
|
"rewards/margins": 0.01800468936562538, |
|
"rewards/rejected": -0.014585788361728191, |
|
"step": 220, |
|
"use_label": 2288.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.220023282887078e-07, |
|
"logits/chosen": -2.4569809436798096, |
|
"logits/rejected": -2.4612298011779785, |
|
"logps/chosen": -294.7366943359375, |
|
"logps/rejected": -254.261962890625, |
|
"loss": 0.672, |
|
"pred_label": 1221.175048828125, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.01110165473073721, |
|
"rewards/margins": 0.014376315288245678, |
|
"rewards/rejected": -0.0032746598590165377, |
|
"step": 230, |
|
"use_label": 2380.824951171875 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1618160651920834e-07, |
|
"logits/chosen": -2.440027952194214, |
|
"logits/rejected": -2.415015935897827, |
|
"logps/chosen": -257.82342529296875, |
|
"logps/rejected": -215.18408203125, |
|
"loss": 0.6712, |
|
"pred_label": 1294.2750244140625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.011741789057850838, |
|
"rewards/margins": 0.014540466479957104, |
|
"rewards/rejected": -0.0027986769564449787, |
|
"step": 240, |
|
"use_label": 2467.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.103608847497089e-07, |
|
"logits/chosen": -2.4385263919830322, |
|
"logits/rejected": -2.4420063495635986, |
|
"logps/chosen": -277.59747314453125, |
|
"logps/rejected": -258.9275817871094, |
|
"loss": 0.672, |
|
"pred_label": 1364.699951171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.01612050086259842, |
|
"rewards/margins": 0.02269991859793663, |
|
"rewards/rejected": -0.0065794168040156364, |
|
"step": 250, |
|
"use_label": 2557.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0454016298020956e-07, |
|
"logits/chosen": -2.42997407913208, |
|
"logits/rejected": -2.424861431121826, |
|
"logps/chosen": -239.51095581054688, |
|
"logps/rejected": -240.63623046875, |
|
"loss": 0.6707, |
|
"pred_label": 1427.824951171875, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.000991971348412335, |
|
"rewards/margins": 0.004964248277246952, |
|
"rewards/rejected": -0.003972277976572514, |
|
"step": 260, |
|
"use_label": 2654.175048828125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.987194412107101e-07, |
|
"logits/chosen": -2.4381518363952637, |
|
"logits/rejected": -2.46921706199646, |
|
"logps/chosen": -280.505859375, |
|
"logps/rejected": -266.14080810546875, |
|
"loss": 0.6735, |
|
"pred_label": 1494.574951171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.01380440779030323, |
|
"rewards/margins": 0.023902520537376404, |
|
"rewards/rejected": -0.010098112747073174, |
|
"step": 270, |
|
"use_label": 2747.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9289871944121066e-07, |
|
"logits/chosen": -2.527064800262451, |
|
"logits/rejected": -2.552640438079834, |
|
"logps/chosen": -284.7080078125, |
|
"logps/rejected": -255.3748016357422, |
|
"loss": 0.6706, |
|
"pred_label": 1554.8499755859375, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0216833408921957, |
|
"rewards/margins": 0.026800300925970078, |
|
"rewards/rejected": -0.005116959102451801, |
|
"step": 280, |
|
"use_label": 2847.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.870779976717113e-07, |
|
"logits/chosen": -2.5222768783569336, |
|
"logits/rejected": -2.508826732635498, |
|
"logps/chosen": -278.7129821777344, |
|
"logps/rejected": -220.00210571289062, |
|
"loss": 0.669, |
|
"pred_label": 1625.199951171875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004253073129802942, |
|
"rewards/margins": 0.014592866413295269, |
|
"rewards/rejected": -0.010339794680476189, |
|
"step": 290, |
|
"use_label": 2936.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022118e-07, |
|
"logits/chosen": -2.4862565994262695, |
|
"logits/rejected": -2.4585397243499756, |
|
"logps/chosen": -254.63589477539062, |
|
"logps/rejected": -230.22238159179688, |
|
"loss": 0.669, |
|
"pred_label": 1695.0250244140625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.013738395646214485, |
|
"rewards/margins": 0.030270254239439964, |
|
"rewards/rejected": -0.01653185673058033, |
|
"step": 300, |
|
"use_label": 3026.97509765625 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.754365541327124e-07, |
|
"logits/chosen": -2.490318775177002, |
|
"logits/rejected": -2.411966323852539, |
|
"logps/chosen": -255.95016479492188, |
|
"logps/rejected": -278.394775390625, |
|
"loss": 0.6677, |
|
"pred_label": 1769.550048828125, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.011918948963284492, |
|
"rewards/margins": 0.024249419569969177, |
|
"rewards/rejected": -0.012330473400652409, |
|
"step": 310, |
|
"use_label": 3112.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.6961583236321304e-07, |
|
"logits/chosen": -2.4655749797821045, |
|
"logits/rejected": -2.429624080657959, |
|
"logps/chosen": -279.8106384277344, |
|
"logps/rejected": -272.11993408203125, |
|
"loss": 0.6648, |
|
"pred_label": 1836.0, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.012938129715621471, |
|
"rewards/margins": 0.027768433094024658, |
|
"rewards/rejected": -0.014830301515758038, |
|
"step": 320, |
|
"use_label": 3206.0 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.637951105937136e-07, |
|
"logits/chosen": -2.494778633117676, |
|
"logits/rejected": -2.445328712463379, |
|
"logps/chosen": -258.9544982910156, |
|
"logps/rejected": -226.5720977783203, |
|
"loss": 0.6674, |
|
"pred_label": 1910.1500244140625, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00947931781411171, |
|
"rewards/margins": 0.01889108493924141, |
|
"rewards/rejected": -0.009411768987774849, |
|
"step": 330, |
|
"use_label": 3291.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.579743888242142e-07, |
|
"logits/chosen": -2.44804048538208, |
|
"logits/rejected": -2.4573702812194824, |
|
"logps/chosen": -267.03192138671875, |
|
"logps/rejected": -228.086181640625, |
|
"loss": 0.6657, |
|
"pred_label": 1974.324951171875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.018211424350738525, |
|
"rewards/margins": 0.021834325045347214, |
|
"rewards/rejected": -0.0036229020915925503, |
|
"step": 340, |
|
"use_label": 3387.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.521536670547148e-07, |
|
"logits/chosen": -2.4245553016662598, |
|
"logits/rejected": -2.4514193534851074, |
|
"logps/chosen": -284.3450927734375, |
|
"logps/rejected": -268.92486572265625, |
|
"loss": 0.6631, |
|
"pred_label": 2047.8499755859375, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.02513638138771057, |
|
"rewards/margins": 0.04776451736688614, |
|
"rewards/rejected": -0.02262813411653042, |
|
"step": 350, |
|
"use_label": 3474.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4633294528521536e-07, |
|
"logits/chosen": -2.5096325874328613, |
|
"logits/rejected": -2.5362212657928467, |
|
"logps/chosen": -262.33953857421875, |
|
"logps/rejected": -256.80279541015625, |
|
"loss": 0.6611, |
|
"pred_label": 2130.89990234375, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008055051788687706, |
|
"rewards/margins": 0.03220166265964508, |
|
"rewards/rejected": -0.024146610870957375, |
|
"step": 360, |
|
"use_label": 3551.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4051222351571594e-07, |
|
"logits/chosen": -2.4874494075775146, |
|
"logits/rejected": -2.4494900703430176, |
|
"logps/chosen": -263.681884765625, |
|
"logps/rejected": -261.24871826171875, |
|
"loss": 0.6645, |
|
"pred_label": 2212.10009765625, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.01168682612478733, |
|
"rewards/margins": 0.035164039582014084, |
|
"rewards/rejected": -0.023477211594581604, |
|
"step": 370, |
|
"use_label": 3629.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.346915017462165e-07, |
|
"logits/chosen": -2.5527141094207764, |
|
"logits/rejected": -2.500330686569214, |
|
"logps/chosen": -270.70123291015625, |
|
"logps/rejected": -251.94638061523438, |
|
"loss": 0.6568, |
|
"pred_label": 2298.27490234375, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.015008668415248394, |
|
"rewards/margins": 0.03496234118938446, |
|
"rewards/rejected": -0.019953671842813492, |
|
"step": 380, |
|
"use_label": 3703.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.288707799767171e-07, |
|
"logits/chosen": -2.4154019355773926, |
|
"logits/rejected": -2.410980224609375, |
|
"logps/chosen": -267.69012451171875, |
|
"logps/rejected": -234.7194061279297, |
|
"loss": 0.6612, |
|
"pred_label": 2388.699951171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.016574550420045853, |
|
"rewards/margins": 0.03597176447510719, |
|
"rewards/rejected": -0.01939721405506134, |
|
"step": 390, |
|
"use_label": 3773.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.230500582072177e-07, |
|
"logits/chosen": -2.573418378829956, |
|
"logits/rejected": -2.5670266151428223, |
|
"logps/chosen": -296.6780090332031, |
|
"logps/rejected": -251.03109741210938, |
|
"loss": 0.6603, |
|
"pred_label": 2472.550048828125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.019582100212574005, |
|
"rewards/margins": 0.03700990229845047, |
|
"rewards/rejected": -0.017427802085876465, |
|
"step": 400, |
|
"use_label": 3849.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1722933643771827e-07, |
|
"logits/chosen": -2.5456345081329346, |
|
"logits/rejected": -2.5357089042663574, |
|
"logps/chosen": -287.56024169921875, |
|
"logps/rejected": -250.7117462158203, |
|
"loss": 0.6565, |
|
"pred_label": 2563.89990234375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02417120710015297, |
|
"rewards/margins": 0.03269309923052788, |
|
"rewards/rejected": -0.008521895855665207, |
|
"step": 410, |
|
"use_label": 3918.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1140861466821885e-07, |
|
"logits/chosen": -2.425220251083374, |
|
"logits/rejected": -2.403921127319336, |
|
"logps/chosen": -231.62960815429688, |
|
"logps/rejected": -243.0712432861328, |
|
"loss": 0.6565, |
|
"pred_label": 2649.25, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.014951197430491447, |
|
"rewards/margins": 0.026637399569153786, |
|
"rewards/rejected": -0.011686199344694614, |
|
"step": 420, |
|
"use_label": 3992.75 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0558789289871943e-07, |
|
"logits/chosen": -2.4625051021575928, |
|
"logits/rejected": -2.4735419750213623, |
|
"logps/chosen": -266.99554443359375, |
|
"logps/rejected": -251.20022583007812, |
|
"loss": 0.6572, |
|
"pred_label": 2728.300048828125, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.02481374330818653, |
|
"rewards/margins": 0.031031513586640358, |
|
"rewards/rejected": -0.006217771675437689, |
|
"step": 430, |
|
"use_label": 4073.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9976717112922e-07, |
|
"logits/chosen": -2.51550555229187, |
|
"logits/rejected": -2.5493228435516357, |
|
"logps/chosen": -274.2099914550781, |
|
"logps/rejected": -245.31491088867188, |
|
"loss": 0.6567, |
|
"pred_label": 2806.375, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.01486388873308897, |
|
"rewards/margins": 0.032243579626083374, |
|
"rewards/rejected": -0.017379695549607277, |
|
"step": 440, |
|
"use_label": 4155.625 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.939464493597206e-07, |
|
"logits/chosen": -2.3210535049438477, |
|
"logits/rejected": -2.3065693378448486, |
|
"logps/chosen": -217.0265655517578, |
|
"logps/rejected": -216.3203582763672, |
|
"loss": 0.6561, |
|
"pred_label": 2881.75, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.009125987999141216, |
|
"rewards/margins": 0.017552336677908897, |
|
"rewards/rejected": -0.008426347747445107, |
|
"step": 450, |
|
"use_label": 4240.25 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8812572759022117e-07, |
|
"logits/chosen": -2.528326988220215, |
|
"logits/rejected": -2.513597011566162, |
|
"logps/chosen": -290.8050231933594, |
|
"logps/rejected": -260.4567565917969, |
|
"loss": 0.6526, |
|
"pred_label": 2965.02490234375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.024029741063714027, |
|
"rewards/margins": 0.03901149705052376, |
|
"rewards/rejected": -0.014981756918132305, |
|
"step": 460, |
|
"use_label": 4316.97509765625 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8230500582072175e-07, |
|
"logits/chosen": -2.424431085586548, |
|
"logits/rejected": -2.41347336769104, |
|
"logps/chosen": -254.66506958007812, |
|
"logps/rejected": -234.6154022216797, |
|
"loss": 0.6498, |
|
"pred_label": 3055.699951171875, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.001125166891142726, |
|
"rewards/margins": 0.027449512854218483, |
|
"rewards/rejected": -0.02632434293627739, |
|
"step": 470, |
|
"use_label": 4386.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7648428405122233e-07, |
|
"logits/chosen": -2.400242567062378, |
|
"logits/rejected": -2.3543686866760254, |
|
"logps/chosen": -235.65719604492188, |
|
"logps/rejected": -239.40701293945312, |
|
"loss": 0.6533, |
|
"pred_label": 3150.449951171875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.011093830689787865, |
|
"rewards/margins": 0.0209193117916584, |
|
"rewards/rejected": -0.009825478307902813, |
|
"step": 480, |
|
"use_label": 4451.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.706635622817229e-07, |
|
"logits/chosen": -2.4891812801361084, |
|
"logits/rejected": -2.4687983989715576, |
|
"logps/chosen": -257.9729919433594, |
|
"logps/rejected": -231.1624298095703, |
|
"loss": 0.6504, |
|
"pred_label": 3233.625, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.018697496503591537, |
|
"rewards/margins": 0.042968858033418655, |
|
"rewards/rejected": -0.02427135966718197, |
|
"step": 490, |
|
"use_label": 4528.375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648428405122235e-07, |
|
"logits/chosen": -2.4956674575805664, |
|
"logits/rejected": -2.444516658782959, |
|
"logps/chosen": -274.7785949707031, |
|
"logps/rejected": -247.5068359375, |
|
"loss": 0.6497, |
|
"pred_label": 3323.5, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.020702462643384933, |
|
"rewards/margins": 0.048901624977588654, |
|
"rewards/rejected": -0.02819916605949402, |
|
"step": 500, |
|
"use_label": 4598.5 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.590221187427241e-07, |
|
"logits/chosen": -2.490265369415283, |
|
"logits/rejected": -2.4258570671081543, |
|
"logps/chosen": -262.3305969238281, |
|
"logps/rejected": -225.7815704345703, |
|
"loss": 0.6488, |
|
"pred_label": 3414.699951171875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.022988121956586838, |
|
"rewards/margins": 0.04825048893690109, |
|
"rewards/rejected": -0.025262365117669106, |
|
"step": 510, |
|
"use_label": 4667.2998046875 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5320139697322466e-07, |
|
"logits/chosen": -2.533203125, |
|
"logits/rejected": -2.481980800628662, |
|
"logps/chosen": -277.6612243652344, |
|
"logps/rejected": -259.1783752441406, |
|
"loss": 0.6496, |
|
"pred_label": 3500.675048828125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03759980946779251, |
|
"rewards/margins": 0.07107619941234589, |
|
"rewards/rejected": -0.033476393669843674, |
|
"step": 520, |
|
"use_label": 4741.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4738067520372524e-07, |
|
"logits/chosen": -2.478482484817505, |
|
"logits/rejected": -2.4806153774261475, |
|
"logps/chosen": -288.8346252441406, |
|
"logps/rejected": -279.86871337890625, |
|
"loss": 0.6453, |
|
"pred_label": 3598.425048828125, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.029218871146440506, |
|
"rewards/margins": 0.05338066816329956, |
|
"rewards/rejected": -0.024161797016859055, |
|
"step": 530, |
|
"use_label": 4803.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.415599534342258e-07, |
|
"logits/chosen": -2.528951644897461, |
|
"logits/rejected": -2.4700913429260254, |
|
"logps/chosen": -278.3326416015625, |
|
"logps/rejected": -256.6160888671875, |
|
"loss": 0.6441, |
|
"pred_label": 3706.47509765625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.03666474670171738, |
|
"rewards/margins": 0.06262056529521942, |
|
"rewards/rejected": -0.025955811142921448, |
|
"step": 540, |
|
"use_label": 4855.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3573923166472642e-07, |
|
"logits/chosen": -2.514728546142578, |
|
"logits/rejected": -2.4884872436523438, |
|
"logps/chosen": -278.8384094238281, |
|
"logps/rejected": -251.11495971679688, |
|
"loss": 0.6419, |
|
"pred_label": 3806.125, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.028542999178171158, |
|
"rewards/margins": 0.05743313580751419, |
|
"rewards/rejected": -0.028890132904052734, |
|
"step": 550, |
|
"use_label": 4915.875 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2991850989522698e-07, |
|
"logits/chosen": -2.4250597953796387, |
|
"logits/rejected": -2.4664273262023926, |
|
"logps/chosen": -263.1305847167969, |
|
"logps/rejected": -209.12753295898438, |
|
"loss": 0.6473, |
|
"pred_label": 3901.925048828125, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.01411795150488615, |
|
"rewards/margins": 0.043625928461551666, |
|
"rewards/rejected": -0.02950797602534294, |
|
"step": 560, |
|
"use_label": 4980.0751953125 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2409778812572759e-07, |
|
"logits/chosen": -2.481841802597046, |
|
"logits/rejected": -2.4556803703308105, |
|
"logps/chosen": -256.8991394042969, |
|
"logps/rejected": -253.22006225585938, |
|
"loss": 0.6455, |
|
"pred_label": 3991.375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.021176273003220558, |
|
"rewards/margins": 0.04561372101306915, |
|
"rewards/rejected": -0.024437451735138893, |
|
"step": 570, |
|
"use_label": 5050.625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1827706635622817e-07, |
|
"logits/chosen": -2.513284683227539, |
|
"logits/rejected": -2.4957690238952637, |
|
"logps/chosen": -265.62261962890625, |
|
"logps/rejected": -255.05160522460938, |
|
"loss": 0.6459, |
|
"pred_label": 4092.97509765625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.023949166759848595, |
|
"rewards/margins": 0.05676042288541794, |
|
"rewards/rejected": -0.0328112468123436, |
|
"step": 580, |
|
"use_label": 5109.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1245634458672875e-07, |
|
"logits/chosen": -2.4279496669769287, |
|
"logits/rejected": -2.3903157711029053, |
|
"logps/chosen": -251.9611358642578, |
|
"logps/rejected": -243.91909790039062, |
|
"loss": 0.6421, |
|
"pred_label": 4190.25, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02141127921640873, |
|
"rewards/margins": 0.04126313328742981, |
|
"rewards/rejected": -0.01985185407102108, |
|
"step": 590, |
|
"use_label": 5171.75 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722933e-07, |
|
"logits/chosen": -2.4996018409729004, |
|
"logits/rejected": -2.505694627761841, |
|
"logps/chosen": -294.9464111328125, |
|
"logps/rejected": -294.47930908203125, |
|
"loss": 0.6445, |
|
"pred_label": 4285.97509765625, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03435974568128586, |
|
"rewards/margins": 0.053214918822050095, |
|
"rewards/rejected": -0.018855175003409386, |
|
"step": 600, |
|
"use_label": 5236.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.008149010477299e-07, |
|
"logits/chosen": -2.500229597091675, |
|
"logits/rejected": -2.44234037399292, |
|
"logps/chosen": -288.0011291503906, |
|
"logps/rejected": -231.5128936767578, |
|
"loss": 0.6418, |
|
"pred_label": 4384.60009765625, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.03441072255373001, |
|
"rewards/margins": 0.06906420737504959, |
|
"rewards/rejected": -0.03465348482131958, |
|
"step": 610, |
|
"use_label": 5297.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.949941792782305e-07, |
|
"logits/chosen": -2.4949872493743896, |
|
"logits/rejected": -2.4376912117004395, |
|
"logps/chosen": -271.5177917480469, |
|
"logps/rejected": -241.4095458984375, |
|
"loss": 0.6436, |
|
"pred_label": 4473.35009765625, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.02876521274447441, |
|
"rewards/margins": 0.04532798379659653, |
|
"rewards/rejected": -0.016562769189476967, |
|
"step": 620, |
|
"use_label": 5368.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8917345750873107e-07, |
|
"logits/chosen": -2.4714274406433105, |
|
"logits/rejected": -2.4965994358062744, |
|
"logps/chosen": -272.4942321777344, |
|
"logps/rejected": -245.6349334716797, |
|
"loss": 0.6423, |
|
"pred_label": 4572.5, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.033678844571113586, |
|
"rewards/margins": 0.053819943219423294, |
|
"rewards/rejected": -0.02014109678566456, |
|
"step": 630, |
|
"use_label": 5429.5 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8335273573923165e-07, |
|
"logits/chosen": -2.4581923484802246, |
|
"logits/rejected": -2.474356174468994, |
|
"logps/chosen": -241.07748413085938, |
|
"logps/rejected": -239.5830535888672, |
|
"loss": 0.6398, |
|
"pred_label": 4672.5751953125, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.036277130246162415, |
|
"rewards/margins": 0.06419126689434052, |
|
"rewards/rejected": -0.027914145961403847, |
|
"step": 640, |
|
"use_label": 5489.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7753201396973226e-07, |
|
"logits/chosen": -2.4635560512542725, |
|
"logits/rejected": -2.4887423515319824, |
|
"logps/chosen": -286.7969665527344, |
|
"logps/rejected": -265.29522705078125, |
|
"loss": 0.6357, |
|
"pred_label": 4776.0751953125, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.028953587636351585, |
|
"rewards/margins": 0.07534638047218323, |
|
"rewards/rejected": -0.04639279097318649, |
|
"step": 650, |
|
"use_label": 5545.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7171129220023281e-07, |
|
"logits/chosen": -2.5433027744293213, |
|
"logits/rejected": -2.4936447143554688, |
|
"logps/chosen": -285.5106506347656, |
|
"logps/rejected": -242.0889434814453, |
|
"loss": 0.6451, |
|
"pred_label": 4884.875, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.024147575721144676, |
|
"rewards/margins": 0.03708204999566078, |
|
"rewards/rejected": -0.012934470549225807, |
|
"step": 660, |
|
"use_label": 5597.125 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.658905704307334e-07, |
|
"logits/chosen": -2.461125612258911, |
|
"logits/rejected": -2.419694185256958, |
|
"logps/chosen": -269.296142578125, |
|
"logps/rejected": -234.6914520263672, |
|
"loss": 0.6392, |
|
"pred_label": 4982.97509765625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.032385144382715225, |
|
"rewards/margins": 0.07203620672225952, |
|
"rewards/rejected": -0.039651062339544296, |
|
"step": 670, |
|
"use_label": 5659.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.60069848661234e-07, |
|
"logits/chosen": -2.4804444313049316, |
|
"logits/rejected": -2.5111165046691895, |
|
"logps/chosen": -274.47027587890625, |
|
"logps/rejected": -250.04782104492188, |
|
"loss": 0.6424, |
|
"pred_label": 5080.27490234375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.020998146384954453, |
|
"rewards/margins": 0.029987860471010208, |
|
"rewards/rejected": -0.008989715948700905, |
|
"step": 680, |
|
"use_label": 5721.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5424912689173456e-07, |
|
"logits/chosen": -2.475935220718384, |
|
"logits/rejected": -2.4738192558288574, |
|
"logps/chosen": -239.99905395507812, |
|
"logps/rejected": -201.9076690673828, |
|
"loss": 0.6373, |
|
"pred_label": 5173.0751953125, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02569044753909111, |
|
"rewards/margins": 0.05879662558436394, |
|
"rewards/rejected": -0.03310617804527283, |
|
"step": 690, |
|
"use_label": 5788.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223514e-07, |
|
"logits/chosen": -2.5004830360412598, |
|
"logits/rejected": -2.4815783500671387, |
|
"logps/chosen": -262.7047424316406, |
|
"logps/rejected": -232.9453887939453, |
|
"loss": 0.6416, |
|
"pred_label": 5271.14990234375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.021065320819616318, |
|
"rewards/margins": 0.05442025512456894, |
|
"rewards/rejected": -0.03335493057966232, |
|
"step": 700, |
|
"use_label": 5850.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4260768335273574e-07, |
|
"logits/chosen": -2.5241992473602295, |
|
"logits/rejected": -2.4814987182617188, |
|
"logps/chosen": -279.4823303222656, |
|
"logps/rejected": -233.07373046875, |
|
"loss": 0.6346, |
|
"pred_label": 5374.75, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.02996956743299961, |
|
"rewards/margins": 0.0639830082654953, |
|
"rewards/rejected": -0.03401344642043114, |
|
"step": 710, |
|
"use_label": 5907.25 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3678696158323632e-07, |
|
"logits/chosen": -2.4806971549987793, |
|
"logits/rejected": -2.4919962882995605, |
|
"logps/chosen": -261.57806396484375, |
|
"logps/rejected": -246.28042602539062, |
|
"loss": 0.6397, |
|
"pred_label": 5479.1748046875, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.034241896122694016, |
|
"rewards/margins": 0.06610169261693954, |
|
"rewards/rejected": -0.03185979649424553, |
|
"step": 720, |
|
"use_label": 5962.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3096623981373688e-07, |
|
"logits/chosen": -2.4852230548858643, |
|
"logits/rejected": -2.473806142807007, |
|
"logps/chosen": -287.5245056152344, |
|
"logps/rejected": -234.40615844726562, |
|
"loss": 0.6392, |
|
"pred_label": 5584.5, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.04039750620722771, |
|
"rewards/margins": 0.07070399820804596, |
|
"rewards/rejected": -0.0303064975887537, |
|
"step": 730, |
|
"use_label": 6017.5 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2514551804423749e-07, |
|
"logits/chosen": -2.466087818145752, |
|
"logits/rejected": -2.5063462257385254, |
|
"logps/chosen": -295.0579528808594, |
|
"logps/rejected": -279.02740478515625, |
|
"loss": 0.6358, |
|
"pred_label": 5692.77490234375, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.040842343121767044, |
|
"rewards/margins": 0.07114710658788681, |
|
"rewards/rejected": -0.030304765328764915, |
|
"step": 740, |
|
"use_label": 6069.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1932479627473807e-07, |
|
"logits/chosen": -2.4788904190063477, |
|
"logits/rejected": -2.460747718811035, |
|
"logps/chosen": -250.11758422851562, |
|
"logps/rejected": -250.706787109375, |
|
"loss": 0.6351, |
|
"pred_label": 5804.22509765625, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02599816396832466, |
|
"rewards/margins": 0.05924210697412491, |
|
"rewards/rejected": -0.03324393928050995, |
|
"step": 750, |
|
"use_label": 6117.77490234375 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1350407450523865e-07, |
|
"logits/chosen": -2.4908533096313477, |
|
"logits/rejected": -2.5631959438323975, |
|
"logps/chosen": -280.65643310546875, |
|
"logps/rejected": -252.4939422607422, |
|
"loss": 0.6382, |
|
"pred_label": 5910.4248046875, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.035374678671360016, |
|
"rewards/margins": 0.06185116618871689, |
|
"rewards/rejected": -0.026476481929421425, |
|
"step": 760, |
|
"use_label": 6171.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0768335273573923e-07, |
|
"logits/chosen": -2.5305213928222656, |
|
"logits/rejected": -2.548992156982422, |
|
"logps/chosen": -291.19085693359375, |
|
"logps/rejected": -276.0303649902344, |
|
"loss": 0.6357, |
|
"pred_label": 6023.6748046875, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.038612816482782364, |
|
"rewards/margins": 0.0703565925359726, |
|
"rewards/rejected": -0.03174378350377083, |
|
"step": 770, |
|
"use_label": 6218.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0186263096623981e-07, |
|
"logits/chosen": -2.500197172164917, |
|
"logits/rejected": -2.5004265308380127, |
|
"logps/chosen": -269.6267395019531, |
|
"logps/rejected": -237.23226928710938, |
|
"loss": 0.6382, |
|
"pred_label": 6124.77490234375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03419669717550278, |
|
"rewards/margins": 0.05804433301091194, |
|
"rewards/rejected": -0.023847635835409164, |
|
"step": 780, |
|
"use_label": 6277.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.604190919674039e-08, |
|
"logits/chosen": -2.5122509002685547, |
|
"logits/rejected": -2.527552843093872, |
|
"logps/chosen": -258.3880920410156, |
|
"logps/rejected": -250.8657684326172, |
|
"loss": 0.6408, |
|
"pred_label": 6225.125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.024235274642705917, |
|
"rewards/margins": 0.03934397175908089, |
|
"rewards/rejected": -0.015108692459762096, |
|
"step": 790, |
|
"use_label": 6336.875 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724097e-08, |
|
"logits/chosen": -2.409346103668213, |
|
"logits/rejected": -2.458906650543213, |
|
"logps/chosen": -291.98406982421875, |
|
"logps/rejected": -263.28204345703125, |
|
"loss": 0.632, |
|
"pred_label": 6330.6748046875, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.03406571224331856, |
|
"rewards/margins": 0.06302747130393982, |
|
"rewards/rejected": -0.028961753472685814, |
|
"step": 800, |
|
"use_label": 6391.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.440046565774157e-08, |
|
"logits/chosen": -2.4541163444519043, |
|
"logits/rejected": -2.426970958709717, |
|
"logps/chosen": -272.098388671875, |
|
"logps/rejected": -219.48318481445312, |
|
"loss": 0.6397, |
|
"pred_label": 6430.9248046875, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0267607681453228, |
|
"rewards/margins": 0.049745358526706696, |
|
"rewards/rejected": -0.022984590381383896, |
|
"step": 810, |
|
"use_label": 6451.0751953125 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.857974388824213e-08, |
|
"logits/chosen": -2.443247079849243, |
|
"logits/rejected": -2.4559197425842285, |
|
"logps/chosen": -278.1483459472656, |
|
"logps/rejected": -233.55221557617188, |
|
"loss": 0.6401, |
|
"pred_label": 6533.77490234375, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.03652986139059067, |
|
"rewards/margins": 0.0706639438867569, |
|
"rewards/rejected": -0.03413407504558563, |
|
"step": 820, |
|
"use_label": 6508.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.275902211874273e-08, |
|
"logits/chosen": -2.492734432220459, |
|
"logits/rejected": -2.5021793842315674, |
|
"logps/chosen": -252.682373046875, |
|
"logps/rejected": -271.76885986328125, |
|
"loss": 0.6402, |
|
"pred_label": 6633.2001953125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02864421345293522, |
|
"rewards/margins": 0.06612807512283325, |
|
"rewards/rejected": -0.03748386353254318, |
|
"step": 830, |
|
"use_label": 6568.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.693830034924331e-08, |
|
"logits/chosen": -2.431647300720215, |
|
"logits/rejected": -2.4801371097564697, |
|
"logps/chosen": -254.59475708007812, |
|
"logps/rejected": -252.9589080810547, |
|
"loss": 0.6335, |
|
"pred_label": 6738.25, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02285713143646717, |
|
"rewards/margins": 0.05092828720808029, |
|
"rewards/rejected": -0.02807115949690342, |
|
"step": 840, |
|
"use_label": 6623.75 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.111757857974389e-08, |
|
"logits/chosen": -2.4767704010009766, |
|
"logits/rejected": -2.5524322986602783, |
|
"logps/chosen": -284.7981872558594, |
|
"logps/rejected": -236.7716522216797, |
|
"loss": 0.6388, |
|
"pred_label": 6841.77490234375, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.05144144967198372, |
|
"rewards/margins": 0.09114910662174225, |
|
"rewards/rejected": -0.03970765322446823, |
|
"step": 850, |
|
"use_label": 6680.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.529685681024446e-08, |
|
"logits/chosen": -2.440410614013672, |
|
"logits/rejected": -2.4388670921325684, |
|
"logps/chosen": -274.1076354980469, |
|
"logps/rejected": -265.62774658203125, |
|
"loss": 0.639, |
|
"pred_label": 6939.10009765625, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.021466780453920364, |
|
"rewards/margins": 0.053748417645692825, |
|
"rewards/rejected": -0.032281629741191864, |
|
"step": 860, |
|
"use_label": 6742.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.947613504074505e-08, |
|
"logits/chosen": -2.4695324897766113, |
|
"logits/rejected": -2.5007336139678955, |
|
"logps/chosen": -234.1849822998047, |
|
"logps/rejected": -233.623046875, |
|
"loss": 0.6369, |
|
"pred_label": 7033.625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.03171985596418381, |
|
"rewards/margins": 0.0530589334666729, |
|
"rewards/rejected": -0.02133907750248909, |
|
"step": 870, |
|
"use_label": 6808.375 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.365541327124563e-08, |
|
"logits/chosen": -2.5348362922668457, |
|
"logits/rejected": -2.48889422416687, |
|
"logps/chosen": -275.351806640625, |
|
"logps/rejected": -252.8624267578125, |
|
"loss": 0.6378, |
|
"pred_label": 7132.27490234375, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.04395347461104393, |
|
"rewards/margins": 0.07273541390895844, |
|
"rewards/rejected": -0.028781946748495102, |
|
"step": 880, |
|
"use_label": 6869.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.783469150174622e-08, |
|
"logits/chosen": -2.5349538326263428, |
|
"logits/rejected": -2.5645627975463867, |
|
"logps/chosen": -269.9933166503906, |
|
"logps/rejected": -262.0923156738281, |
|
"loss": 0.6362, |
|
"pred_label": 7239.27490234375, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.024751801043748856, |
|
"rewards/margins": 0.05075649172067642, |
|
"rewards/rejected": -0.026004692539572716, |
|
"step": 890, |
|
"use_label": 6922.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.20139697322468e-08, |
|
"logits/chosen": -2.4200382232666016, |
|
"logits/rejected": -2.4404149055480957, |
|
"logps/chosen": -264.64501953125, |
|
"logps/rejected": -250.6757354736328, |
|
"loss": 0.6397, |
|
"pred_label": 7349.375, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.02098809741437435, |
|
"rewards/margins": 0.051287733018398285, |
|
"rewards/rejected": -0.030299633741378784, |
|
"step": 900, |
|
"use_label": 6972.625 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.619324796274738e-08, |
|
"logits/chosen": -2.485106945037842, |
|
"logits/rejected": -2.509174346923828, |
|
"logps/chosen": -282.1513671875, |
|
"logps/rejected": -253.21719360351562, |
|
"loss": 0.6356, |
|
"pred_label": 7449.4501953125, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0359305813908577, |
|
"rewards/margins": 0.05107826739549637, |
|
"rewards/rejected": -0.015147687867283821, |
|
"step": 910, |
|
"use_label": 7032.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.037252619324796e-08, |
|
"logits/chosen": -2.4887633323669434, |
|
"logits/rejected": -2.4791512489318848, |
|
"logps/chosen": -279.5574951171875, |
|
"logps/rejected": -249.22793579101562, |
|
"loss": 0.6371, |
|
"pred_label": 7561.125, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.034208498895168304, |
|
"rewards/margins": 0.06242678314447403, |
|
"rewards/rejected": -0.028218284249305725, |
|
"step": 920, |
|
"use_label": 7080.875 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4551804423748545e-08, |
|
"logits/chosen": -2.499810218811035, |
|
"logits/rejected": -2.465851306915283, |
|
"logps/chosen": -258.83099365234375, |
|
"logps/rejected": -234.7783203125, |
|
"loss": 0.6375, |
|
"pred_label": 7669.27490234375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04888115078210831, |
|
"rewards/margins": 0.0783516988158226, |
|
"rewards/rejected": -0.02947053872048855, |
|
"step": 930, |
|
"use_label": 7132.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.731082654249125e-09, |
|
"logits/chosen": -2.530332565307617, |
|
"logits/rejected": -2.512815237045288, |
|
"logps/chosen": -277.4713439941406, |
|
"logps/rejected": -260.1994934082031, |
|
"loss": 0.6359, |
|
"pred_label": 7771.25, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.009665247052907944, |
|
"rewards/margins": 0.03832637146115303, |
|
"rewards/rejected": -0.028661120682954788, |
|
"step": 940, |
|
"use_label": 7190.75 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.910360884749709e-09, |
|
"logits/chosen": -2.4885025024414062, |
|
"logits/rejected": -2.4381051063537598, |
|
"logps/chosen": -255.72409057617188, |
|
"logps/rejected": -244.0724639892578, |
|
"loss": 0.6403, |
|
"pred_label": 7870.75, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0019513871520757675, |
|
"rewards/margins": 0.03540351241827011, |
|
"rewards/rejected": -0.03345213085412979, |
|
"step": 950, |
|
"use_label": 7251.25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.5062618255615234, |
|
"eval_logits/rejected": -2.5199859142303467, |
|
"eval_logps/chosen": -272.93438720703125, |
|
"eval_logps/rejected": -250.28643798828125, |
|
"eval_loss": 0.6350826025009155, |
|
"eval_pred_label": 8131.07177734375, |
|
"eval_rewards/accuracies": 0.6200000047683716, |
|
"eval_rewards/chosen": 0.029984984546899796, |
|
"eval_rewards/margins": 0.06345725804567337, |
|
"eval_rewards/rejected": -0.033472273498773575, |
|
"eval_runtime": 444.1068, |
|
"eval_samples_per_second": 4.503, |
|
"eval_steps_per_second": 0.281, |
|
"eval_use_label": 7400.92822265625, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6552608817035616, |
|
"train_runtime": 24261.6882, |
|
"train_samples_per_second": 2.52, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|