|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.980285167694092, |
|
"logits/rejected": -2.87275767326355, |
|
"logps/chosen": -313.4390563964844, |
|
"logps/rejected": -236.1754150390625, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333334e-06, |
|
"logits/chosen": -2.861464262008667, |
|
"logits/rejected": -2.907951593399048, |
|
"logps/chosen": -323.6517333984375, |
|
"logps/rejected": -284.9451904296875, |
|
"loss": 0.6921, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0027037172112613916, |
|
"rewards/margins": 0.001292458618991077, |
|
"rewards/rejected": 0.0014112575445324183, |
|
"step": 10, |
|
"use_label": 90.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"logits/chosen": -2.7527613639831543, |
|
"logits/rejected": -2.796025037765503, |
|
"logps/chosen": -236.6191864013672, |
|
"logps/rejected": -242.22232055664062, |
|
"loss": 0.6831, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007748906500637531, |
|
"rewards/margins": 0.018490687012672424, |
|
"rewards/rejected": -0.010741781443357468, |
|
"step": 20, |
|
"use_label": 242.0 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-05, |
|
"logits/chosen": -2.8575313091278076, |
|
"logits/rejected": -2.829209804534912, |
|
"logps/chosen": -278.3554992675781, |
|
"logps/rejected": -252.61123657226562, |
|
"loss": 0.6574, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.026403894647955894, |
|
"rewards/margins": 0.09085250645875931, |
|
"rewards/rejected": -0.06444860994815826, |
|
"step": 30, |
|
"use_label": 402.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"logits/chosen": -2.840946674346924, |
|
"logits/rejected": -2.8493659496307373, |
|
"logps/chosen": -281.32928466796875, |
|
"logps/rejected": -277.8607482910156, |
|
"loss": 0.6339, |
|
"pred_label": 0.10000000149011612, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02641097828745842, |
|
"rewards/margins": 0.2079576551914215, |
|
"rewards/rejected": -0.1815466731786728, |
|
"step": 40, |
|
"use_label": 561.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-05, |
|
"logits/chosen": -2.854478120803833, |
|
"logits/rejected": -2.839433193206787, |
|
"logps/chosen": -266.80517578125, |
|
"logps/rejected": -261.9176025390625, |
|
"loss": 0.5914, |
|
"pred_label": 2.950000047683716, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06968289613723755, |
|
"rewards/margins": 0.33043327927589417, |
|
"rewards/rejected": -0.4001162648200989, |
|
"step": 50, |
|
"use_label": 719.0499877929688 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-05, |
|
"logits/chosen": -2.817157506942749, |
|
"logits/rejected": -2.8072521686553955, |
|
"logps/chosen": -300.76080322265625, |
|
"logps/rejected": -290.3218994140625, |
|
"loss": 0.5988, |
|
"pred_label": 11.574999809265137, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03455673158168793, |
|
"rewards/margins": 0.3870925307273865, |
|
"rewards/rejected": -0.4216492772102356, |
|
"step": 60, |
|
"use_label": 870.4249877929688 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"logits/chosen": -2.820021152496338, |
|
"logits/rejected": -2.813854217529297, |
|
"logps/chosen": -294.513671875, |
|
"logps/rejected": -278.9858093261719, |
|
"loss": 0.5227, |
|
"pred_label": 25.399999618530273, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.020013216882944107, |
|
"rewards/margins": 0.5820196866989136, |
|
"rewards/rejected": -0.6020328998565674, |
|
"step": 70, |
|
"use_label": 1016.5999755859375 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-05, |
|
"logits/chosen": -2.7645280361175537, |
|
"logits/rejected": -2.7516016960144043, |
|
"logps/chosen": -306.9416809082031, |
|
"logps/rejected": -288.9856262207031, |
|
"loss": 0.4536, |
|
"pred_label": 58.599998474121094, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16363248229026794, |
|
"rewards/margins": 0.8336677551269531, |
|
"rewards/rejected": -0.9973002672195435, |
|
"step": 80, |
|
"use_label": 1143.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6875e-05, |
|
"logits/chosen": -2.6945321559906006, |
|
"logits/rejected": -2.747893810272217, |
|
"logps/chosen": -295.63934326171875, |
|
"logps/rejected": -275.24127197265625, |
|
"loss": 0.4735, |
|
"pred_label": 106.5, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.014383295550942421, |
|
"rewards/margins": 0.911063551902771, |
|
"rewards/rejected": -0.8966802358627319, |
|
"step": 90, |
|
"use_label": 1255.5 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922003e-05, |
|
"logits/chosen": -2.719611406326294, |
|
"logits/rejected": -2.718784809112549, |
|
"logps/chosen": -276.9577941894531, |
|
"logps/rejected": -306.4429626464844, |
|
"loss": 0.484, |
|
"pred_label": 153.3249969482422, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.28260549902915955, |
|
"rewards/margins": 1.008049726486206, |
|
"rewards/rejected": -1.290655255317688, |
|
"step": 100, |
|
"use_label": 1368.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918509895227008e-05, |
|
"logits/chosen": -2.7281861305236816, |
|
"logits/rejected": -2.687361478805542, |
|
"logps/chosen": -269.6018981933594, |
|
"logps/rejected": -263.89166259765625, |
|
"loss": 0.4361, |
|
"pred_label": 213.60000610351562, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4927287697792053, |
|
"rewards/margins": 0.9790979623794556, |
|
"rewards/rejected": -1.4718266725540161, |
|
"step": 110, |
|
"use_label": 1468.4000244140625 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.860302677532014e-05, |
|
"logits/chosen": -2.7501158714294434, |
|
"logits/rejected": -2.742772340774536, |
|
"logps/chosen": -281.5338439941406, |
|
"logps/rejected": -259.219970703125, |
|
"loss": 0.4674, |
|
"pred_label": 270.0249938964844, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.20209825038909912, |
|
"rewards/margins": 0.9515292048454285, |
|
"rewards/rejected": -1.1536273956298828, |
|
"step": 120, |
|
"use_label": 1571.9749755859375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.80209545983702e-05, |
|
"logits/chosen": -2.8077096939086914, |
|
"logits/rejected": -2.7647416591644287, |
|
"logps/chosen": -308.7791748046875, |
|
"logps/rejected": -282.69976806640625, |
|
"loss": 0.4803, |
|
"pred_label": 317.4750061035156, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.15117435157299042, |
|
"rewards/margins": 0.8076679110527039, |
|
"rewards/rejected": -0.958842396736145, |
|
"step": 130, |
|
"use_label": 1684.5250244140625 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.743888242142026e-05, |
|
"logits/chosen": -2.814915657043457, |
|
"logits/rejected": -2.8093409538269043, |
|
"logps/chosen": -261.60418701171875, |
|
"logps/rejected": -249.7925567626953, |
|
"loss": 0.3944, |
|
"pred_label": 370.67498779296875, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.34973591566085815, |
|
"rewards/margins": 1.1276283264160156, |
|
"rewards/rejected": -1.4773643016815186, |
|
"step": 140, |
|
"use_label": 1791.324951171875 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.685681024447032e-05, |
|
"logits/chosen": -2.71696138381958, |
|
"logits/rejected": -2.740691661834717, |
|
"logps/chosen": -302.32843017578125, |
|
"logps/rejected": -298.6430358886719, |
|
"loss": 0.3948, |
|
"pred_label": 430.0, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7442375421524048, |
|
"rewards/margins": 0.9453207850456238, |
|
"rewards/rejected": -1.6895582675933838, |
|
"step": 150, |
|
"use_label": 1892.0 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6274738067520374e-05, |
|
"logits/chosen": -2.7621922492980957, |
|
"logits/rejected": -2.707430601119995, |
|
"logps/chosen": -264.4729919433594, |
|
"logps/rejected": -283.64837646484375, |
|
"loss": 0.4037, |
|
"pred_label": 494.9750061035156, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.20909884572029114, |
|
"rewards/margins": 1.2514889240264893, |
|
"rewards/rejected": -1.4605878591537476, |
|
"step": 160, |
|
"use_label": 1987.0250244140625 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5692665890570435e-05, |
|
"logits/chosen": -2.800288438796997, |
|
"logits/rejected": -2.8005611896514893, |
|
"logps/chosen": -298.9527282714844, |
|
"logps/rejected": -264.09014892578125, |
|
"loss": 0.3923, |
|
"pred_label": 555.2249755859375, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24765756726264954, |
|
"rewards/margins": 1.1968435049057007, |
|
"rewards/rejected": -1.4445011615753174, |
|
"step": 170, |
|
"use_label": 2086.77490234375 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.511059371362049e-05, |
|
"logits/chosen": -2.7035715579986572, |
|
"logits/rejected": -2.6763572692871094, |
|
"logps/chosen": -261.8144836425781, |
|
"logps/rejected": -294.1943054199219, |
|
"loss": 0.3547, |
|
"pred_label": 639.875, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.588528573513031, |
|
"rewards/margins": 1.7626692056655884, |
|
"rewards/rejected": -2.3511977195739746, |
|
"step": 180, |
|
"use_label": 2162.125 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.452852153667055e-05, |
|
"logits/chosen": -2.788900375366211, |
|
"logits/rejected": -2.745344638824463, |
|
"logps/chosen": -311.311767578125, |
|
"logps/rejected": -287.326416015625, |
|
"loss": 0.3357, |
|
"pred_label": 725.2999877929688, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4055810868740082, |
|
"rewards/margins": 2.22284197807312, |
|
"rewards/rejected": -2.6284232139587402, |
|
"step": 190, |
|
"use_label": 2236.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.394644935972061e-05, |
|
"logits/chosen": -2.789668560028076, |
|
"logits/rejected": -2.7655420303344727, |
|
"logps/chosen": -274.904052734375, |
|
"logps/rejected": -299.71356201171875, |
|
"loss": 0.3242, |
|
"pred_label": 815.0, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3575278520584106, |
|
"rewards/margins": 1.7897872924804688, |
|
"rewards/rejected": -3.147315263748169, |
|
"step": 200, |
|
"use_label": 2307.0 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.336437718277067e-05, |
|
"logits/chosen": -2.7797892093658447, |
|
"logits/rejected": -2.796938419342041, |
|
"logps/chosen": -310.7392883300781, |
|
"logps/rejected": -278.1353454589844, |
|
"loss": 0.3789, |
|
"pred_label": 897.2750244140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9491372108459473, |
|
"rewards/margins": 1.4454370737075806, |
|
"rewards/rejected": -2.394573926925659, |
|
"step": 210, |
|
"use_label": 2384.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278230500582072e-05, |
|
"logits/chosen": -2.729732036590576, |
|
"logits/rejected": -2.7413415908813477, |
|
"logps/chosen": -283.9934387207031, |
|
"logps/rejected": -297.1932678222656, |
|
"loss": 0.3046, |
|
"pred_label": 983.3499755859375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9001734852790833, |
|
"rewards/margins": 1.7534692287445068, |
|
"rewards/rejected": -2.6536426544189453, |
|
"step": 220, |
|
"use_label": 2458.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.220023282887078e-05, |
|
"logits/chosen": -2.7093918323516846, |
|
"logits/rejected": -2.724216938018799, |
|
"logps/chosen": -320.80218505859375, |
|
"logps/rejected": -295.5621643066406, |
|
"loss": 0.2963, |
|
"pred_label": 1077.550048828125, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2629055976867676, |
|
"rewards/margins": 1.8823902606964111, |
|
"rewards/rejected": -3.1452958583831787, |
|
"step": 230, |
|
"use_label": 2524.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.161816065192084e-05, |
|
"logits/chosen": -2.707616090774536, |
|
"logits/rejected": -2.6788878440856934, |
|
"logps/chosen": -300.3539123535156, |
|
"logps/rejected": -273.60333251953125, |
|
"loss": 0.2668, |
|
"pred_label": 1176.5250244140625, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.5776500701904297, |
|
"rewards/margins": 2.4807069301605225, |
|
"rewards/rejected": -5.058356761932373, |
|
"step": 240, |
|
"use_label": 2585.47509765625 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.10360884749709e-05, |
|
"logits/chosen": -2.7368381023406982, |
|
"logits/rejected": -2.722391366958618, |
|
"logps/chosen": -316.1106262207031, |
|
"logps/rejected": -321.246337890625, |
|
"loss": 0.3166, |
|
"pred_label": 1270.574951171875, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.5470499992370605, |
|
"rewards/margins": 2.635700225830078, |
|
"rewards/rejected": -5.182750225067139, |
|
"step": 250, |
|
"use_label": 2651.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.045401629802096e-05, |
|
"logits/chosen": -2.7675890922546387, |
|
"logits/rejected": -2.7713561058044434, |
|
"logps/chosen": -266.34136962890625, |
|
"logps/rejected": -286.3191833496094, |
|
"loss": 0.3408, |
|
"pred_label": 1354.6500244140625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7731956243515015, |
|
"rewards/margins": 1.8298133611679077, |
|
"rewards/rejected": -3.603008985519409, |
|
"step": 260, |
|
"use_label": 2727.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9871944121071014e-05, |
|
"logits/chosen": -2.764836311340332, |
|
"logits/rejected": -2.7679736614227295, |
|
"logps/chosen": -306.4355773925781, |
|
"logps/rejected": -316.5330505371094, |
|
"loss": 0.3, |
|
"pred_label": 1444.324951171875, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.4564238786697388, |
|
"rewards/margins": 2.7216858863830566, |
|
"rewards/rejected": -4.178110122680664, |
|
"step": 270, |
|
"use_label": 2797.675048828125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.928987194412107e-05, |
|
"logits/chosen": -2.81805682182312, |
|
"logits/rejected": -2.8056905269622803, |
|
"logps/chosen": -309.92047119140625, |
|
"logps/rejected": -300.94268798828125, |
|
"loss": 0.3011, |
|
"pred_label": 1543.4749755859375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.595649003982544, |
|
"rewards/margins": 2.275697708129883, |
|
"rewards/rejected": -3.8713467121124268, |
|
"step": 280, |
|
"use_label": 2858.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.870779976717113e-05, |
|
"logits/chosen": -2.786515235900879, |
|
"logits/rejected": -2.7539620399475098, |
|
"logps/chosen": -312.62060546875, |
|
"logps/rejected": -268.4746398925781, |
|
"loss": 0.3045, |
|
"pred_label": 1641.2249755859375, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9698221683502197, |
|
"rewards/margins": 1.8559925556182861, |
|
"rewards/rejected": -3.825814723968506, |
|
"step": 290, |
|
"use_label": 2920.77490234375 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022119e-05, |
|
"logits/chosen": -2.79560923576355, |
|
"logits/rejected": -2.744600296020508, |
|
"logps/chosen": -279.0300598144531, |
|
"logps/rejected": -275.5624084472656, |
|
"loss": 0.3083, |
|
"pred_label": 1731.300048828125, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5351210832595825, |
|
"rewards/margins": 2.036700963973999, |
|
"rewards/rejected": -3.571821928024292, |
|
"step": 300, |
|
"use_label": 2990.699951171875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7543655413271246e-05, |
|
"logits/chosen": -2.7574381828308105, |
|
"logits/rejected": -2.6999497413635254, |
|
"logps/chosen": -285.3556213378906, |
|
"logps/rejected": -344.51849365234375, |
|
"loss": 0.2794, |
|
"pred_label": 1830.0, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8423988819122314, |
|
"rewards/margins": 3.7551066875457764, |
|
"rewards/rejected": -5.597506046295166, |
|
"step": 310, |
|
"use_label": 3052.0 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.696158323632131e-05, |
|
"logits/chosen": -2.7264797687530518, |
|
"logits/rejected": -2.687101125717163, |
|
"logps/chosen": -318.2635192871094, |
|
"logps/rejected": -331.156005859375, |
|
"loss": 0.2657, |
|
"pred_label": 1930.050048828125, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.228361129760742, |
|
"rewards/margins": 2.806436777114868, |
|
"rewards/rejected": -5.034797668457031, |
|
"step": 320, |
|
"use_label": 3111.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.637951105937136e-05, |
|
"logits/chosen": -2.7709312438964844, |
|
"logits/rejected": -2.7176883220672607, |
|
"logps/chosen": -289.0472717285156, |
|
"logps/rejected": -277.4143371582031, |
|
"loss": 0.2868, |
|
"pred_label": 2034.1500244140625, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8098747730255127, |
|
"rewards/margins": 2.50335955619812, |
|
"rewards/rejected": -4.313233852386475, |
|
"step": 330, |
|
"use_label": 3167.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.579743888242142e-05, |
|
"logits/chosen": -2.69682240486145, |
|
"logits/rejected": -2.7000985145568848, |
|
"logps/chosen": -308.01141357421875, |
|
"logps/rejected": -279.4844665527344, |
|
"loss": 0.3048, |
|
"pred_label": 2128.949951171875, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.883141279220581, |
|
"rewards/margins": 1.98026442527771, |
|
"rewards/rejected": -3.863405704498291, |
|
"step": 340, |
|
"use_label": 3233.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5215366705471484e-05, |
|
"logits/chosen": -2.663508176803589, |
|
"logits/rejected": -2.6349689960479736, |
|
"logps/chosen": -314.67962646484375, |
|
"logps/rejected": -331.1473693847656, |
|
"loss": 0.2869, |
|
"pred_label": 2234.77490234375, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.7484877109527588, |
|
"rewards/margins": 3.6000218391418457, |
|
"rewards/rejected": -5.348509788513184, |
|
"step": 350, |
|
"use_label": 3287.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.463329452852154e-05, |
|
"logits/chosen": -2.7377424240112305, |
|
"logits/rejected": -2.729678153991699, |
|
"logps/chosen": -297.26873779296875, |
|
"logps/rejected": -313.16888427734375, |
|
"loss": 0.2666, |
|
"pred_label": 2342.22509765625, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4474658966064453, |
|
"rewards/margins": 2.349332094192505, |
|
"rewards/rejected": -4.796797752380371, |
|
"step": 360, |
|
"use_label": 3339.77490234375 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.40512223515716e-05, |
|
"logits/chosen": -2.6696877479553223, |
|
"logits/rejected": -2.5973799228668213, |
|
"logps/chosen": -310.88897705078125, |
|
"logps/rejected": -354.97222900390625, |
|
"loss": 0.2556, |
|
"pred_label": 2450.97509765625, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.468627452850342, |
|
"rewards/margins": 4.456360816955566, |
|
"rewards/rejected": -7.924988746643066, |
|
"step": 370, |
|
"use_label": 3391.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3469150174621654e-05, |
|
"logits/chosen": -2.7021727561950684, |
|
"logits/rejected": -2.679241418838501, |
|
"logps/chosen": -320.2778015136719, |
|
"logps/rejected": -327.36431884765625, |
|
"loss": 0.2394, |
|
"pred_label": 2560.375, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.5047569274902344, |
|
"rewards/margins": 3.173809289932251, |
|
"rewards/rejected": -6.678565979003906, |
|
"step": 380, |
|
"use_label": 3441.625 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.288707799767171e-05, |
|
"logits/chosen": -2.6417362689971924, |
|
"logits/rejected": -2.6089835166931152, |
|
"logps/chosen": -295.4016418457031, |
|
"logps/rejected": -288.3475341796875, |
|
"loss": 0.2799, |
|
"pred_label": 2666.675048828125, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2609713077545166, |
|
"rewards/margins": 3.1455700397491455, |
|
"rewards/rejected": -4.406540870666504, |
|
"step": 390, |
|
"use_label": 3495.324951171875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2305005820721776e-05, |
|
"logits/chosen": -2.774970054626465, |
|
"logits/rejected": -2.757179021835327, |
|
"logps/chosen": -317.8477478027344, |
|
"logps/rejected": -290.2752685546875, |
|
"loss": 0.2669, |
|
"pred_label": 2758.64990234375, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0796750783920288, |
|
"rewards/margins": 2.0139389038085938, |
|
"rewards/rejected": -3.093614101409912, |
|
"step": 400, |
|
"use_label": 3563.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.172293364377183e-05, |
|
"logits/chosen": -2.777430295944214, |
|
"logits/rejected": -2.7574374675750732, |
|
"logps/chosen": -312.574951171875, |
|
"logps/rejected": -291.488037109375, |
|
"loss": 0.296, |
|
"pred_label": 2843.10009765625, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4477269649505615, |
|
"rewards/margins": 1.8462398052215576, |
|
"rewards/rejected": -3.2939670085906982, |
|
"step": 410, |
|
"use_label": 3638.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1140861466821885e-05, |
|
"logits/chosen": -2.718291759490967, |
|
"logits/rejected": -2.688828706741333, |
|
"logps/chosen": -254.98593139648438, |
|
"logps/rejected": -283.18560791015625, |
|
"loss": 0.3046, |
|
"pred_label": 2935.14990234375, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.440553069114685, |
|
"rewards/margins": 1.9665199518203735, |
|
"rewards/rejected": -3.4070727825164795, |
|
"step": 420, |
|
"use_label": 3706.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.055878928987195e-05, |
|
"logits/chosen": -2.7530155181884766, |
|
"logits/rejected": -2.7446746826171875, |
|
"logps/chosen": -288.42120361328125, |
|
"logps/rejected": -287.4293518066406, |
|
"loss": 0.3446, |
|
"pred_label": 3031.39990234375, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0520861148834229, |
|
"rewards/margins": 1.7391481399536133, |
|
"rewards/rejected": -2.791234254837036, |
|
"step": 430, |
|
"use_label": 3770.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9976717112922005e-05, |
|
"logits/chosen": -2.8176846504211426, |
|
"logits/rejected": -2.8231449127197266, |
|
"logps/chosen": -296.26837158203125, |
|
"logps/rejected": -284.00335693359375, |
|
"loss": 0.2945, |
|
"pred_label": 3121.39990234375, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2998032569885254, |
|
"rewards/margins": 1.8008592128753662, |
|
"rewards/rejected": -3.1006627082824707, |
|
"step": 440, |
|
"use_label": 3840.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.939464493597206e-05, |
|
"logits/chosen": -2.6171724796295166, |
|
"logits/rejected": -2.6079916954040527, |
|
"logps/chosen": -248.51025390625, |
|
"logps/rejected": -273.4137268066406, |
|
"loss": 0.287, |
|
"pred_label": 3211.925048828125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8791544437408447, |
|
"rewards/margins": 2.497044086456299, |
|
"rewards/rejected": -4.3761982917785645, |
|
"step": 450, |
|
"use_label": 3910.074951171875 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.881257275902212e-05, |
|
"logits/chosen": -2.815080165863037, |
|
"logits/rejected": -2.8055264949798584, |
|
"logps/chosen": -318.76666259765625, |
|
"logps/rejected": -305.4552917480469, |
|
"loss": 0.3106, |
|
"pred_label": 3311.449951171875, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6964454650878906, |
|
"rewards/margins": 1.949159026145935, |
|
"rewards/rejected": -3.645604372024536, |
|
"step": 460, |
|
"use_label": 3970.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8230500582072178e-05, |
|
"logits/chosen": -2.736795425415039, |
|
"logits/rejected": -2.70324444770813, |
|
"logps/chosen": -289.38824462890625, |
|
"logps/rejected": -302.501708984375, |
|
"loss": 0.2652, |
|
"pred_label": 3412.550048828125, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.2287776470184326, |
|
"rewards/margins": 2.337141513824463, |
|
"rewards/rejected": -4.565918922424316, |
|
"step": 470, |
|
"use_label": 4029.449951171875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7648428405122233e-05, |
|
"logits/chosen": -2.6519317626953125, |
|
"logits/rejected": -2.616342067718506, |
|
"logps/chosen": -274.76422119140625, |
|
"logps/rejected": -303.0647277832031, |
|
"loss": 0.3052, |
|
"pred_label": 3516.85009765625, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.918997287750244, |
|
"rewards/margins": 2.5355329513549805, |
|
"rewards/rejected": -5.454530239105225, |
|
"step": 480, |
|
"use_label": 4085.14990234375 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7066356228172297e-05, |
|
"logits/chosen": -2.6925861835479736, |
|
"logits/rejected": -2.67038631439209, |
|
"logps/chosen": -289.875, |
|
"logps/rejected": -282.19903564453125, |
|
"loss": 0.2758, |
|
"pred_label": 3613.125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.284597873687744, |
|
"rewards/margins": 2.195889711380005, |
|
"rewards/rejected": -4.48048734664917, |
|
"step": 490, |
|
"use_label": 4148.875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6484284051222352e-05, |
|
"logits/chosen": -2.7040975093841553, |
|
"logits/rejected": -2.6474618911743164, |
|
"logps/chosen": -308.2983703613281, |
|
"logps/rejected": -305.19525146484375, |
|
"loss": 0.2732, |
|
"pred_label": 3721.64990234375, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8841865062713623, |
|
"rewards/margins": 2.809114456176758, |
|
"rewards/rejected": -4.693300724029541, |
|
"step": 500, |
|
"use_label": 4200.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.590221187427241e-05, |
|
"logits/chosen": -2.7423815727233887, |
|
"logits/rejected": -2.699392795562744, |
|
"logps/chosen": -296.5220642089844, |
|
"logps/rejected": -285.8718566894531, |
|
"loss": 0.2433, |
|
"pred_label": 3824.625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1184868812561035, |
|
"rewards/margins": 3.1191751956939697, |
|
"rewards/rejected": -5.237661361694336, |
|
"step": 510, |
|
"use_label": 4257.375 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.532013969732247e-05, |
|
"logits/chosen": -2.7525784969329834, |
|
"logits/rejected": -2.727067708969116, |
|
"logps/chosen": -304.08343505859375, |
|
"logps/rejected": -308.287841796875, |
|
"loss": 0.2989, |
|
"pred_label": 3925.175048828125, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2129416465759277, |
|
"rewards/margins": 2.644366502761841, |
|
"rewards/rejected": -3.8573079109191895, |
|
"step": 520, |
|
"use_label": 4316.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4738067520372525e-05, |
|
"logits/chosen": -2.759995937347412, |
|
"logits/rejected": -2.6951136589050293, |
|
"logps/chosen": -314.9224548339844, |
|
"logps/rejected": -335.85101318359375, |
|
"loss": 0.2862, |
|
"pred_label": 4020.449951171875, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4606602191925049, |
|
"rewards/margins": 2.726986885070801, |
|
"rewards/rejected": -4.187647342681885, |
|
"step": 530, |
|
"use_label": 4381.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4155995343422587e-05, |
|
"logits/chosen": -2.7623682022094727, |
|
"logits/rejected": -2.743201732635498, |
|
"logps/chosen": -307.76995849609375, |
|
"logps/rejected": -319.3795471191406, |
|
"loss": 0.2676, |
|
"pred_label": 4129.97509765625, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9233381748199463, |
|
"rewards/margins": 3.4177041053771973, |
|
"rewards/rejected": -5.3410420417785645, |
|
"step": 540, |
|
"use_label": 4432.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3573923166472644e-05, |
|
"logits/chosen": -2.766664981842041, |
|
"logits/rejected": -2.727250576019287, |
|
"logps/chosen": -307.920166015625, |
|
"logps/rejected": -313.8031311035156, |
|
"loss": 0.2619, |
|
"pred_label": 4236.1748046875, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.871700644493103, |
|
"rewards/margins": 3.314380645751953, |
|
"rewards/rejected": -5.186081886291504, |
|
"step": 550, |
|
"use_label": 4485.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2991850989522702e-05, |
|
"logits/chosen": -2.747933864593506, |
|
"logits/rejected": -2.743129253387451, |
|
"logps/chosen": -296.66705322265625, |
|
"logps/rejected": -261.8858947753906, |
|
"loss": 0.281, |
|
"pred_label": 4334.72509765625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2262706756591797, |
|
"rewards/margins": 2.213707685470581, |
|
"rewards/rejected": -4.43997859954834, |
|
"step": 560, |
|
"use_label": 4547.27490234375 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.240977881257276e-05, |
|
"logits/chosen": -2.7517714500427246, |
|
"logits/rejected": -2.7313544750213623, |
|
"logps/chosen": -288.4844970703125, |
|
"logps/rejected": -302.5806579589844, |
|
"loss": 0.2905, |
|
"pred_label": 4442.875, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8995840549468994, |
|
"rewards/margins": 2.1551976203918457, |
|
"rewards/rejected": -4.054781913757324, |
|
"step": 570, |
|
"use_label": 4599.125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1827706635622818e-05, |
|
"logits/chosen": -2.7617430686950684, |
|
"logits/rejected": -2.73646879196167, |
|
"logps/chosen": -295.9949645996094, |
|
"logps/rejected": -310.08331298828125, |
|
"loss": 0.2821, |
|
"pred_label": 4553.9501953125, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.7886556386947632, |
|
"rewards/margins": 2.772958993911743, |
|
"rewards/rejected": -4.561614513397217, |
|
"step": 580, |
|
"use_label": 4648.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.124563445867288e-05, |
|
"logits/chosen": -2.6858975887298584, |
|
"logits/rejected": -2.6842644214630127, |
|
"logps/chosen": -292.4759216308594, |
|
"logps/rejected": -323.04754638671875, |
|
"loss": 0.2683, |
|
"pred_label": 4648.1748046875, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0878047943115234, |
|
"rewards/margins": 2.138942241668701, |
|
"rewards/rejected": -4.226747035980225, |
|
"step": 590, |
|
"use_label": 4713.8251953125 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722934e-05, |
|
"logits/chosen": -2.755965232849121, |
|
"logits/rejected": -2.7383854389190674, |
|
"logps/chosen": -327.86962890625, |
|
"logps/rejected": -351.33074951171875, |
|
"loss": 0.2585, |
|
"pred_label": 4749.9501953125, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.4506783485412598, |
|
"rewards/margins": 3.4063594341278076, |
|
"rewards/rejected": -4.8570380210876465, |
|
"step": 600, |
|
"use_label": 4772.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0081490104772992e-05, |
|
"logits/chosen": -2.7139110565185547, |
|
"logits/rejected": -2.7096176147460938, |
|
"logps/chosen": -322.1976013183594, |
|
"logps/rejected": -281.54266357421875, |
|
"loss": 0.267, |
|
"pred_label": 4857.875, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.6288321018218994, |
|
"rewards/margins": 2.5627994537353516, |
|
"rewards/rejected": -4.19163179397583, |
|
"step": 610, |
|
"use_label": 4824.125 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9499417927823053e-05, |
|
"logits/chosen": -2.733909845352173, |
|
"logits/rejected": -2.6813321113586426, |
|
"logps/chosen": -296.90032958984375, |
|
"logps/rejected": -298.9272766113281, |
|
"loss": 0.2764, |
|
"pred_label": 4962.77490234375, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.691881775856018, |
|
"rewards/margins": 2.9819297790527344, |
|
"rewards/rejected": -4.673811912536621, |
|
"step": 620, |
|
"use_label": 4879.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8917345750873107e-05, |
|
"logits/chosen": -2.7139275074005127, |
|
"logits/rejected": -2.7132036685943604, |
|
"logps/chosen": -297.9289855957031, |
|
"logps/rejected": -306.44940185546875, |
|
"loss": 0.2715, |
|
"pred_label": 5066.4501953125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2809231281280518, |
|
"rewards/margins": 3.2467029094696045, |
|
"rewards/rejected": -4.527626991271973, |
|
"step": 630, |
|
"use_label": 4935.5498046875 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.833527357392317e-05, |
|
"logits/chosen": -2.7025070190429688, |
|
"logits/rejected": -2.669739246368408, |
|
"logps/chosen": -266.36798095703125, |
|
"logps/rejected": -293.29669189453125, |
|
"loss": 0.2907, |
|
"pred_label": 5163.5, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.3176727294921875, |
|
"rewards/margins": 3.1546790599823, |
|
"rewards/rejected": -4.472352027893066, |
|
"step": 640, |
|
"use_label": 4998.5 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7753201396973227e-05, |
|
"logits/chosen": -2.721750497817993, |
|
"logits/rejected": -2.6910438537597656, |
|
"logps/chosen": -316.4061279296875, |
|
"logps/rejected": -322.0241394042969, |
|
"loss": 0.2773, |
|
"pred_label": 5267.2001953125, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4457476139068604, |
|
"rewards/margins": 2.958343982696533, |
|
"rewards/rejected": -4.404091835021973, |
|
"step": 650, |
|
"use_label": 5054.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.717112922002328e-05, |
|
"logits/chosen": -2.7757656574249268, |
|
"logits/rejected": -2.7493858337402344, |
|
"logps/chosen": -311.73321533203125, |
|
"logps/rejected": -285.9647216796875, |
|
"loss": 0.2968, |
|
"pred_label": 5371.77490234375, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.636950135231018, |
|
"rewards/margins": 1.971967339515686, |
|
"rewards/rejected": -3.6089179515838623, |
|
"step": 660, |
|
"use_label": 5110.22509765625 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6589057043073342e-05, |
|
"logits/chosen": -2.7224373817443848, |
|
"logits/rejected": -2.686645030975342, |
|
"logps/chosen": -299.3620300292969, |
|
"logps/rejected": -292.6378173828125, |
|
"loss": 0.2702, |
|
"pred_label": 5477.2001953125, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.828905701637268, |
|
"rewards/margins": 3.295133113861084, |
|
"rewards/rejected": -5.124039173126221, |
|
"step": 670, |
|
"use_label": 5164.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.60069848661234e-05, |
|
"logits/chosen": -2.7378199100494385, |
|
"logits/rejected": -2.7310595512390137, |
|
"logps/chosen": -304.2184753417969, |
|
"logps/rejected": -292.1669921875, |
|
"loss": 0.3204, |
|
"pred_label": 5576.47509765625, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.7283070087432861, |
|
"rewards/margins": 1.6653932332992554, |
|
"rewards/rejected": -3.393700361251831, |
|
"step": 680, |
|
"use_label": 5225.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5424912689173458e-05, |
|
"logits/chosen": -2.7175381183624268, |
|
"logits/rejected": -2.7000772953033447, |
|
"logps/chosen": -266.01263427734375, |
|
"logps/rejected": -258.3471374511719, |
|
"loss": 0.258, |
|
"pred_label": 5677.625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.56284499168396, |
|
"rewards/margins": 3.3434224128723145, |
|
"rewards/rejected": -4.9062676429748535, |
|
"step": 690, |
|
"use_label": 5284.375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223516e-05, |
|
"logits/chosen": -2.735502243041992, |
|
"logits/rejected": -2.690195322036743, |
|
"logps/chosen": -293.5663146972656, |
|
"logps/rejected": -299.68121337890625, |
|
"loss": 0.2598, |
|
"pred_label": 5784.4248046875, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7876224517822266, |
|
"rewards/margins": 3.3526370525360107, |
|
"rewards/rejected": -5.140259742736816, |
|
"step": 700, |
|
"use_label": 5337.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4260768335273575e-05, |
|
"logits/chosen": -2.7820751667022705, |
|
"logits/rejected": -2.74092173576355, |
|
"logps/chosen": -305.2626953125, |
|
"logps/rejected": -284.5972900390625, |
|
"loss": 0.2792, |
|
"pred_label": 5887.0498046875, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5688129663467407, |
|
"rewards/margins": 2.764045000076294, |
|
"rewards/rejected": -4.332858085632324, |
|
"step": 710, |
|
"use_label": 5394.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3678696158323633e-05, |
|
"logits/chosen": -2.741055727005005, |
|
"logits/rejected": -2.739257335662842, |
|
"logps/chosen": -288.95611572265625, |
|
"logps/rejected": -306.3753967285156, |
|
"loss": 0.2461, |
|
"pred_label": 5995.5498046875, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7503833770751953, |
|
"rewards/margins": 3.5468757152557373, |
|
"rewards/rejected": -5.297258377075195, |
|
"step": 720, |
|
"use_label": 5446.4501953125 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.309662398137369e-05, |
|
"logits/chosen": -2.7259597778320312, |
|
"logits/rejected": -2.719268321990967, |
|
"logps/chosen": -315.80181884765625, |
|
"logps/rejected": -282.5977783203125, |
|
"loss": 0.2265, |
|
"pred_label": 6100.625, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8351987600326538, |
|
"rewards/margins": 2.257732629776001, |
|
"rewards/rejected": -4.092931270599365, |
|
"step": 730, |
|
"use_label": 5501.375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2514551804423749e-05, |
|
"logits/chosen": -2.704035758972168, |
|
"logits/rejected": -2.694396495819092, |
|
"logps/chosen": -332.0672912597656, |
|
"logps/rejected": -348.5420837402344, |
|
"loss": 0.254, |
|
"pred_label": 6200.5751953125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.469543933868408, |
|
"rewards/margins": 3.7357654571533203, |
|
"rewards/rejected": -6.2053093910217285, |
|
"step": 740, |
|
"use_label": 5561.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1932479627473807e-05, |
|
"logits/chosen": -2.717568874359131, |
|
"logits/rejected": -2.6717875003814697, |
|
"logps/chosen": -285.0014343261719, |
|
"logps/rejected": -322.31671142578125, |
|
"loss": 0.2139, |
|
"pred_label": 6312.14990234375, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.451322078704834, |
|
"rewards/margins": 3.727611541748047, |
|
"rewards/rejected": -6.178933620452881, |
|
"step": 750, |
|
"use_label": 5609.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1350407450523866e-05, |
|
"logits/chosen": -2.719574451446533, |
|
"logits/rejected": -2.7501304149627686, |
|
"logps/chosen": -310.88677978515625, |
|
"logps/rejected": -308.3591003417969, |
|
"loss": 0.2668, |
|
"pred_label": 6422.97509765625, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.5423939228057861, |
|
"rewards/margins": 3.0822057723999023, |
|
"rewards/rejected": -4.624599456787109, |
|
"step": 760, |
|
"use_label": 5659.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0768335273573923e-05, |
|
"logits/chosen": -2.746647357940674, |
|
"logits/rejected": -2.733283519744873, |
|
"logps/chosen": -320.37213134765625, |
|
"logps/rejected": -333.1225280761719, |
|
"loss": 0.248, |
|
"pred_label": 6535.22509765625, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.807631492614746, |
|
"rewards/margins": 3.0322647094726562, |
|
"rewards/rejected": -4.839896202087402, |
|
"step": 770, |
|
"use_label": 5706.77490234375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0186263096623982e-05, |
|
"logits/chosen": -2.7445006370544434, |
|
"logits/rejected": -2.7238264083862305, |
|
"logps/chosen": -303.70404052734375, |
|
"logps/rejected": -301.3880615234375, |
|
"loss": 0.2467, |
|
"pred_label": 6641.10009765625, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2510251998901367, |
|
"rewards/margins": 3.1742377281188965, |
|
"rewards/rejected": -5.425262928009033, |
|
"step": 780, |
|
"use_label": 5760.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.60419091967404e-06, |
|
"logits/chosen": -2.7392661571502686, |
|
"logits/rejected": -2.7158615589141846, |
|
"logps/chosen": -294.07037353515625, |
|
"logps/rejected": -322.58758544921875, |
|
"loss": 0.2523, |
|
"pred_label": 6745.9501953125, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.626100778579712, |
|
"rewards/margins": 3.5596566200256348, |
|
"rewards/rejected": -6.185757637023926, |
|
"step": 790, |
|
"use_label": 5816.0498046875 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724098e-06, |
|
"logits/chosen": -2.6276087760925293, |
|
"logits/rejected": -2.5911715030670166, |
|
"logps/chosen": -343.8062438964844, |
|
"logps/rejected": -343.41070556640625, |
|
"loss": 0.2253, |
|
"pred_label": 6867.2998046875, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9334328174591064, |
|
"rewards/margins": 3.8014297485351562, |
|
"rewards/rejected": -6.73486328125, |
|
"step": 800, |
|
"use_label": 5854.7001953125 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.440046565774158e-06, |
|
"logits/chosen": -2.6645331382751465, |
|
"logits/rejected": -2.640127658843994, |
|
"logps/chosen": -313.86480712890625, |
|
"logps/rejected": -287.8795471191406, |
|
"loss": 0.2183, |
|
"pred_label": 6983.14990234375, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.2840819358825684, |
|
"rewards/margins": 2.622573137283325, |
|
"rewards/rejected": -5.9066548347473145, |
|
"step": 810, |
|
"use_label": 5898.85009765625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.857974388824214e-06, |
|
"logits/chosen": -2.684762477874756, |
|
"logits/rejected": -2.6731903553009033, |
|
"logps/chosen": -316.68157958984375, |
|
"logps/rejected": -307.9013671875, |
|
"loss": 0.2752, |
|
"pred_label": 7098.10009765625, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.3363418579101562, |
|
"rewards/margins": 4.297635078430176, |
|
"rewards/rejected": -6.63397741317749, |
|
"step": 820, |
|
"use_label": 5943.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.275902211874273e-06, |
|
"logits/chosen": -2.7039098739624023, |
|
"logits/rejected": -2.6936678886413574, |
|
"logps/chosen": -289.11761474609375, |
|
"logps/rejected": -341.92388916015625, |
|
"loss": 0.2433, |
|
"pred_label": 7206.47509765625, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6304259300231934, |
|
"rewards/margins": 3.5417304039001465, |
|
"rewards/rejected": -6.172156810760498, |
|
"step": 830, |
|
"use_label": 5995.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.693830034924331e-06, |
|
"logits/chosen": -2.6950812339782715, |
|
"logits/rejected": -2.695385456085205, |
|
"logps/chosen": -295.2992858886719, |
|
"logps/rejected": -315.1665954589844, |
|
"loss": 0.2626, |
|
"pred_label": 7316.375, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.7282135486602783, |
|
"rewards/margins": 2.3400490283966064, |
|
"rewards/rejected": -5.068262577056885, |
|
"step": 840, |
|
"use_label": 6045.625 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.111757857974389e-06, |
|
"logits/chosen": -2.691633701324463, |
|
"logits/rejected": -2.7306087017059326, |
|
"logps/chosen": -319.0354919433594, |
|
"logps/rejected": -299.2270812988281, |
|
"loss": 0.2547, |
|
"pred_label": 7422.2001953125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.400865316390991, |
|
"rewards/margins": 3.1857247352600098, |
|
"rewards/rejected": -5.586589813232422, |
|
"step": 850, |
|
"use_label": 6099.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.529685681024447e-06, |
|
"logits/chosen": -2.6894938945770264, |
|
"logits/rejected": -2.6826186180114746, |
|
"logps/chosen": -314.03753662109375, |
|
"logps/rejected": -334.26690673828125, |
|
"loss": 0.2358, |
|
"pred_label": 7535.10009765625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9154160022735596, |
|
"rewards/margins": 3.2621803283691406, |
|
"rewards/rejected": -6.177596092224121, |
|
"step": 860, |
|
"use_label": 6146.89990234375 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.947613504074506e-06, |
|
"logits/chosen": -2.6914477348327637, |
|
"logits/rejected": -2.6861438751220703, |
|
"logps/chosen": -267.033203125, |
|
"logps/rejected": -298.70428466796875, |
|
"loss": 0.2361, |
|
"pred_label": 7641.60009765625, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.404568910598755, |
|
"rewards/margins": 3.4409728050231934, |
|
"rewards/rejected": -5.845541477203369, |
|
"step": 870, |
|
"use_label": 6200.39990234375 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3655413271245635e-06, |
|
"logits/chosen": -2.7052195072174072, |
|
"logits/rejected": -2.6577229499816895, |
|
"logps/chosen": -311.93719482421875, |
|
"logps/rejected": -314.8600769042969, |
|
"loss": 0.2224, |
|
"pred_label": 7745.8251953125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5347952842712402, |
|
"rewards/margins": 2.786163330078125, |
|
"rewards/rejected": -5.320958614349365, |
|
"step": 880, |
|
"use_label": 6256.1748046875 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7834691501746217e-06, |
|
"logits/chosen": -2.718468189239502, |
|
"logits/rejected": -2.710252523422241, |
|
"logps/chosen": -304.1473693847656, |
|
"logps/rejected": -332.4493103027344, |
|
"loss": 0.2246, |
|
"pred_label": 7864.375, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.2862813472747803, |
|
"rewards/margins": 3.7558963298797607, |
|
"rewards/rejected": -6.042177677154541, |
|
"step": 890, |
|
"use_label": 6297.625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2013969732246805e-06, |
|
"logits/chosen": -2.6227777004241943, |
|
"logits/rejected": -2.571324110031128, |
|
"logps/chosen": -307.7371826171875, |
|
"logps/rejected": -324.693359375, |
|
"loss": 0.2493, |
|
"pred_label": 7978.625, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.859910488128662, |
|
"rewards/margins": 3.757611036300659, |
|
"rewards/rejected": -6.617520809173584, |
|
"step": 900, |
|
"use_label": 6343.375 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6193247962747383e-06, |
|
"logits/chosen": -2.701064109802246, |
|
"logits/rejected": -2.665743350982666, |
|
"logps/chosen": -323.77337646484375, |
|
"logps/rejected": -322.24432373046875, |
|
"loss": 0.238, |
|
"pred_label": 8087.5, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7357375621795654, |
|
"rewards/margins": 3.1752612590789795, |
|
"rewards/rejected": -5.910999298095703, |
|
"step": 910, |
|
"use_label": 6394.5 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.037252619324796e-06, |
|
"logits/chosen": -2.639965772628784, |
|
"logits/rejected": -2.6299610137939453, |
|
"logps/chosen": -327.05975341796875, |
|
"logps/rejected": -325.63909912109375, |
|
"loss": 0.2258, |
|
"pred_label": 8202.6748046875, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.017204761505127, |
|
"rewards/margins": 3.5882301330566406, |
|
"rewards/rejected": -6.605435371398926, |
|
"step": 920, |
|
"use_label": 6439.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4551804423748545e-06, |
|
"logits/chosen": -2.69065260887146, |
|
"logits/rejected": -2.6345713138580322, |
|
"logps/chosen": -286.16278076171875, |
|
"logps/rejected": -308.4196472167969, |
|
"loss": 0.2207, |
|
"pred_label": 8314.349609375, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7949655055999756, |
|
"rewards/margins": 4.867483139038086, |
|
"rewards/rejected": -6.662448883056641, |
|
"step": 930, |
|
"use_label": 6487.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.731082654249127e-07, |
|
"logits/chosen": -2.71934175491333, |
|
"logits/rejected": -2.67936372756958, |
|
"logps/chosen": -318.2463073730469, |
|
"logps/rejected": -326.56634521484375, |
|
"loss": 0.234, |
|
"pred_label": 8430.5751953125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0853824615478516, |
|
"rewards/margins": 2.8827648162841797, |
|
"rewards/rejected": -5.968147277832031, |
|
"step": 940, |
|
"use_label": 6531.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.910360884749709e-07, |
|
"logits/chosen": -2.6776463985443115, |
|
"logits/rejected": -2.6152024269104004, |
|
"logps/chosen": -300.53662109375, |
|
"logps/rejected": -322.4374694824219, |
|
"loss": 0.2135, |
|
"pred_label": 8549.625, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.2081990242004395, |
|
"rewards/margins": 3.740739107131958, |
|
"rewards/rejected": -6.94893741607666, |
|
"step": 950, |
|
"use_label": 6572.375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.6974706649780273, |
|
"eval_logits/rejected": -2.665019989013672, |
|
"eval_logps/chosen": -311.6889953613281, |
|
"eval_logps/rejected": -322.95947265625, |
|
"eval_loss": 0.2408759742975235, |
|
"eval_pred_label": 8833.576171875, |
|
"eval_rewards/accuracies": 0.734000027179718, |
|
"eval_rewards/chosen": -2.7431609630584717, |
|
"eval_rewards/margins": 3.6228184700012207, |
|
"eval_rewards/rejected": -6.36598014831543, |
|
"eval_runtime": 452.5439, |
|
"eval_samples_per_second": 4.419, |
|
"eval_steps_per_second": 0.276, |
|
"eval_use_label": 6698.423828125, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.31699458866219243, |
|
"train_runtime": 25218.7851, |
|
"train_samples_per_second": 2.424, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|