sfulay's picture
Model save
d5d8196 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9988571428571429,
"eval_steps": 100,
"global_step": 437,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022857142857142857,
"grad_norm": 9.858836405653433,
"learning_rate": 1.1363636363636363e-07,
"logits/chosen": -2.7008285522460938,
"logits/rejected": -2.6250243186950684,
"logps/chosen": -301.27081298828125,
"logps/rejected": -281.75146484375,
"loss": 0.693,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": 0.00015341350808739662,
"rewards/margins": 0.0001716136175673455,
"rewards/rejected": -1.8200071281171404e-05,
"step": 10
},
{
"epoch": 0.045714285714285714,
"grad_norm": 7.79798162706573,
"learning_rate": 2.2727272727272726e-07,
"logits/chosen": -2.6407124996185303,
"logits/rejected": -2.6055800914764404,
"logps/chosen": -278.97711181640625,
"logps/rejected": -254.7215576171875,
"loss": 0.6923,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.0019410619279369712,
"rewards/margins": 0.0014655741397291422,
"rewards/rejected": 0.0004754880501423031,
"step": 20
},
{
"epoch": 0.06857142857142857,
"grad_norm": 8.391842432337471,
"learning_rate": 3.4090909090909085e-07,
"logits/chosen": -2.63759183883667,
"logits/rejected": -2.6166491508483887,
"logps/chosen": -263.44866943359375,
"logps/rejected": -263.5602111816406,
"loss": 0.6871,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.012344349175691605,
"rewards/margins": 0.008015613071620464,
"rewards/rejected": 0.004328734241425991,
"step": 30
},
{
"epoch": 0.09142857142857143,
"grad_norm": 9.39930524490582,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": -2.6476945877075195,
"logits/rejected": -2.5853049755096436,
"logps/chosen": -290.5145568847656,
"logps/rejected": -268.3503723144531,
"loss": 0.6755,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.034826718270778656,
"rewards/margins": 0.042017363011837006,
"rewards/rejected": -0.007190642412751913,
"step": 40
},
{
"epoch": 0.11428571428571428,
"grad_norm": 13.99279327188838,
"learning_rate": 4.997124959943201e-07,
"logits/chosen": -2.6765246391296387,
"logits/rejected": -2.5974183082580566,
"logps/chosen": -294.23516845703125,
"logps/rejected": -254.03042602539062,
"loss": 0.6629,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.02075173892080784,
"rewards/margins": 0.0924127846956253,
"rewards/rejected": -0.07166104018688202,
"step": 50
},
{
"epoch": 0.13714285714285715,
"grad_norm": 10.917441498337828,
"learning_rate": 4.979579212164186e-07,
"logits/chosen": -2.5758731365203857,
"logits/rejected": -2.472479820251465,
"logps/chosen": -290.3058166503906,
"logps/rejected": -270.32891845703125,
"loss": 0.6375,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.09799840301275253,
"rewards/margins": 0.11976947635412216,
"rewards/rejected": -0.21776790916919708,
"step": 60
},
{
"epoch": 0.16,
"grad_norm": 14.438870715842985,
"learning_rate": 4.946196886175515e-07,
"logits/chosen": -2.568722724914551,
"logits/rejected": -2.5102906227111816,
"logps/chosen": -284.9170227050781,
"logps/rejected": -291.45648193359375,
"loss": 0.6099,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.09138783067464828,
"rewards/margins": 0.22690913081169128,
"rewards/rejected": -0.31829696893692017,
"step": 70
},
{
"epoch": 0.18285714285714286,
"grad_norm": 12.571362458092375,
"learning_rate": 4.897191188239667e-07,
"logits/chosen": -2.5521557331085205,
"logits/rejected": -2.4944987297058105,
"logps/chosen": -291.53851318359375,
"logps/rejected": -307.15631103515625,
"loss": 0.6075,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.24509508907794952,
"rewards/margins": 0.27337878942489624,
"rewards/rejected": -0.5184738636016846,
"step": 80
},
{
"epoch": 0.2057142857142857,
"grad_norm": 13.355734952743322,
"learning_rate": 4.832875107981763e-07,
"logits/chosen": -2.6172804832458496,
"logits/rejected": -2.551274538040161,
"logps/chosen": -292.2721252441406,
"logps/rejected": -307.9034423828125,
"loss": 0.6079,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.15986846387386322,
"rewards/margins": 0.3636865019798279,
"rewards/rejected": -0.5235549807548523,
"step": 90
},
{
"epoch": 0.22857142857142856,
"grad_norm": 14.60913947420442,
"learning_rate": 4.753659419387223e-07,
"logits/chosen": -2.629087448120117,
"logits/rejected": -2.5364227294921875,
"logps/chosen": -312.4242858886719,
"logps/rejected": -296.9600524902344,
"loss": 0.5995,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.2603791356086731,
"rewards/margins": 0.3747071921825409,
"rewards/rejected": -0.6350862979888916,
"step": 100
},
{
"epoch": 0.22857142857142856,
"eval_logits/chosen": -2.465451717376709,
"eval_logits/rejected": -2.34794545173645,
"eval_logps/chosen": -306.55853271484375,
"eval_logps/rejected": -294.0323486328125,
"eval_loss": 0.5960295796394348,
"eval_rewards/accuracies": 0.7155172228813171,
"eval_rewards/chosen": -0.30954551696777344,
"eval_rewards/margins": 0.4400167167186737,
"eval_rewards/rejected": -0.7495622038841248,
"eval_runtime": 90.9114,
"eval_samples_per_second": 20.14,
"eval_steps_per_second": 0.319,
"step": 100
},
{
"epoch": 0.25142857142857145,
"grad_norm": 16.802955839707842,
"learning_rate": 4.660050057270191e-07,
"logits/chosen": -2.4196040630340576,
"logits/rejected": -2.3442416191101074,
"logps/chosen": -358.9793395996094,
"logps/rejected": -372.57965087890625,
"loss": 0.5847,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.42675742506980896,
"rewards/margins": 0.3199427127838135,
"rewards/rejected": -0.7467001676559448,
"step": 110
},
{
"epoch": 0.2742857142857143,
"grad_norm": 18.751366323915487,
"learning_rate": 4.5526448859687144e-07,
"logits/chosen": -2.141848087310791,
"logits/rejected": -1.955249547958374,
"logps/chosen": -351.53961181640625,
"logps/rejected": -315.23406982421875,
"loss": 0.571,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.40635138750076294,
"rewards/margins": 0.42884722352027893,
"rewards/rejected": -0.8351985812187195,
"step": 120
},
{
"epoch": 0.29714285714285715,
"grad_norm": 24.80835479779821,
"learning_rate": 4.432129880904388e-07,
"logits/chosen": -1.6259305477142334,
"logits/rejected": -1.344472885131836,
"logps/chosen": -370.61798095703125,
"logps/rejected": -360.02374267578125,
"loss": 0.5487,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.6546204686164856,
"rewards/margins": 0.4409145414829254,
"rewards/rejected": -1.0955349206924438,
"step": 130
},
{
"epoch": 0.32,
"grad_norm": 22.937175363847114,
"learning_rate": 4.299274747394055e-07,
"logits/chosen": -1.5916811227798462,
"logits/rejected": -1.4237914085388184,
"logps/chosen": -361.98211669921875,
"logps/rejected": -371.1536560058594,
"loss": 0.5577,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.4653448462486267,
"rewards/margins": 0.5270770788192749,
"rewards/rejected": -0.9924219250679016,
"step": 140
},
{
"epoch": 0.34285714285714286,
"grad_norm": 22.019311232158735,
"learning_rate": 4.1549280046953653e-07,
"logits/chosen": -0.7861512303352356,
"logits/rejected": -0.3463224768638611,
"logps/chosen": -362.31756591796875,
"logps/rejected": -414.545166015625,
"loss": 0.5335,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.7533167004585266,
"rewards/margins": 0.6291457414627075,
"rewards/rejected": -1.3824622631072998,
"step": 150
},
{
"epoch": 0.3657142857142857,
"grad_norm": 28.0394510543571,
"learning_rate": 4.000011566683401e-07,
"logits/chosen": -0.27471694350242615,
"logits/rejected": 0.3287709653377533,
"logps/chosen": -392.02667236328125,
"logps/rejected": -424.70556640625,
"loss": 0.5445,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.9268640279769897,
"rewards/margins": 0.7637636065483093,
"rewards/rejected": -1.6906276941299438,
"step": 160
},
{
"epoch": 0.38857142857142857,
"grad_norm": 26.83796617150145,
"learning_rate": 3.8355148537705047e-07,
"logits/chosen": -0.9808514714241028,
"logits/rejected": -0.5751891732215881,
"logps/chosen": -374.8761291503906,
"logps/rejected": -386.53851318359375,
"loss": 0.5452,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.7077123522758484,
"rewards/margins": 0.5003622770309448,
"rewards/rejected": -1.2080745697021484,
"step": 170
},
{
"epoch": 0.4114285714285714,
"grad_norm": 24.04944603295859,
"learning_rate": 3.662488473675315e-07,
"logits/chosen": -1.283348798751831,
"logits/rejected": -0.556102991104126,
"logps/chosen": -405.58306884765625,
"logps/rejected": -420.27606201171875,
"loss": 0.5456,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.7540328502655029,
"rewards/margins": 0.8492224812507629,
"rewards/rejected": -1.603255271911621,
"step": 180
},
{
"epoch": 0.4342857142857143,
"grad_norm": 24.200865372247023,
"learning_rate": 3.48203751140067e-07,
"logits/chosen": -0.9612107276916504,
"logits/rejected": -0.4138285517692566,
"logps/chosen": -374.7398376464844,
"logps/rejected": -386.4855041503906,
"loss": 0.5501,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.9848228693008423,
"rewards/margins": 0.5350502133369446,
"rewards/rejected": -1.5198729038238525,
"step": 190
},
{
"epoch": 0.45714285714285713,
"grad_norm": 21.201969213616838,
"learning_rate": 3.2953144712759537e-07,
"logits/chosen": -1.3318579196929932,
"logits/rejected": -0.688397228717804,
"logps/chosen": -338.3636169433594,
"logps/rejected": -369.132080078125,
"loss": 0.5431,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.7002092599868774,
"rewards/margins": 0.7023059725761414,
"rewards/rejected": -1.402515172958374,
"step": 200
},
{
"epoch": 0.45714285714285713,
"eval_logits/chosen": -1.0623676776885986,
"eval_logits/rejected": -0.09592445194721222,
"eval_logps/chosen": -338.33111572265625,
"eval_logps/rejected": -371.40704345703125,
"eval_loss": 0.5325908660888672,
"eval_rewards/accuracies": 0.7629310488700867,
"eval_rewards/chosen": -0.627271831035614,
"eval_rewards/margins": 0.8960375785827637,
"eval_rewards/rejected": -1.523309350013733,
"eval_runtime": 90.9273,
"eval_samples_per_second": 20.137,
"eval_steps_per_second": 0.319,
"step": 200
},
{
"epoch": 0.48,
"grad_norm": 26.3766221546479,
"learning_rate": 3.103511916141658e-07,
"logits/chosen": -0.7819164991378784,
"logits/rejected": -0.08627365529537201,
"logps/chosen": -337.33160400390625,
"logps/rejected": -386.0670471191406,
"loss": 0.5355,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7387806177139282,
"rewards/margins": 0.6690900921821594,
"rewards/rejected": -1.4078707695007324,
"step": 210
},
{
"epoch": 0.5028571428571429,
"grad_norm": 25.508902398231484,
"learning_rate": 2.9078548506882117e-07,
"logits/chosen": -0.4892755448818207,
"logits/rejected": 0.36961695551872253,
"logps/chosen": -375.30145263671875,
"logps/rejected": -400.32037353515625,
"loss": 0.5589,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.90235435962677,
"rewards/margins": 0.6390146613121033,
"rewards/rejected": -1.5413691997528076,
"step": 220
},
{
"epoch": 0.5257142857142857,
"grad_norm": 20.815219094385377,
"learning_rate": 2.709592897595191e-07,
"logits/chosen": -0.5455812215805054,
"logits/rejected": 0.40110301971435547,
"logps/chosen": -356.85198974609375,
"logps/rejected": -377.463623046875,
"loss": 0.5282,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.7579285502433777,
"rewards/margins": 0.6238406896591187,
"rewards/rejected": -1.3817692995071411,
"step": 230
},
{
"epoch": 0.5485714285714286,
"grad_norm": 29.165147607391557,
"learning_rate": 2.509992316440332e-07,
"logits/chosen": -0.44089436531066895,
"logits/rejected": 0.5840796828269958,
"logps/chosen": -384.84747314453125,
"logps/rejected": -446.173828125,
"loss": 0.5281,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.8613764047622681,
"rewards/margins": 0.8861438632011414,
"rewards/rejected": -1.7475202083587646,
"step": 240
},
{
"epoch": 0.5714285714285714,
"grad_norm": 21.600370107234998,
"learning_rate": 2.3103279163519918e-07,
"logits/chosen": -0.7112084031105042,
"logits/rejected": -0.16417662799358368,
"logps/chosen": -350.6177978515625,
"logps/rejected": -410.4039001464844,
"loss": 0.5421,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.69991534948349,
"rewards/margins": 0.7021188735961914,
"rewards/rejected": -1.4020342826843262,
"step": 250
},
{
"epoch": 0.5942857142857143,
"grad_norm": 25.092625042887974,
"learning_rate": 2.1118749140573358e-07,
"logits/chosen": -0.9435871243476868,
"logits/rejected": -0.3378845751285553,
"logps/chosen": -350.99188232421875,
"logps/rejected": -403.04901123046875,
"loss": 0.5464,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.7120517492294312,
"rewards/margins": 0.5850083231925964,
"rewards/rejected": -1.297060251235962,
"step": 260
},
{
"epoch": 0.6171428571428571,
"grad_norm": 27.282048688910123,
"learning_rate": 1.9159007893272703e-07,
"logits/chosen": -0.04894972965121269,
"logits/rejected": 1.124455213546753,
"logps/chosen": -359.29815673828125,
"logps/rejected": -393.60260009765625,
"loss": 0.5192,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.8677228689193726,
"rewards/margins": 0.7632287740707397,
"rewards/rejected": -1.6309516429901123,
"step": 270
},
{
"epoch": 0.64,
"grad_norm": 28.952729734552342,
"learning_rate": 1.7236571898357766e-07,
"logits/chosen": 0.5159433484077454,
"logits/rejected": 1.3371174335479736,
"logps/chosen": -371.30694580078125,
"logps/rejected": -441.8828125,
"loss": 0.5321,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.9826878309249878,
"rewards/margins": 0.8050721287727356,
"rewards/rejected": -1.7877601385116577,
"step": 280
},
{
"epoch": 0.6628571428571428,
"grad_norm": 26.553591107110048,
"learning_rate": 1.5363719371356882e-07,
"logits/chosen": 0.31891578435897827,
"logits/rejected": 1.1744709014892578,
"logps/chosen": -396.26849365234375,
"logps/rejected": -436.003173828125,
"loss": 0.527,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.9536263346672058,
"rewards/margins": 0.7509206533432007,
"rewards/rejected": -1.7045469284057617,
"step": 290
},
{
"epoch": 0.6857142857142857,
"grad_norm": 21.68374961944068,
"learning_rate": 1.3552411848071565e-07,
"logits/chosen": -0.4674099385738373,
"logits/rejected": 0.9225466847419739,
"logps/chosen": -378.4803771972656,
"logps/rejected": -411.07000732421875,
"loss": 0.5138,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.7657118439674377,
"rewards/margins": 0.8185675740242004,
"rewards/rejected": -1.5842792987823486,
"step": 300
},
{
"epoch": 0.6857142857142857,
"eval_logits/chosen": -0.7676966190338135,
"eval_logits/rejected": 0.6739733219146729,
"eval_logps/chosen": -332.8910827636719,
"eval_logps/rejected": -368.96173095703125,
"eval_loss": 0.524158775806427,
"eval_rewards/accuracies": 0.7629310488700867,
"eval_rewards/chosen": -0.5728713274002075,
"eval_rewards/margins": 0.9259848594665527,
"eval_rewards/rejected": -1.4988560676574707,
"eval_runtime": 90.1222,
"eval_samples_per_second": 20.317,
"eval_steps_per_second": 0.322,
"step": 300
},
{
"epoch": 0.7085714285714285,
"grad_norm": 21.32729150729434,
"learning_rate": 1.1814217788631473e-07,
"logits/chosen": -0.6603255271911621,
"logits/rejected": 0.27117711305618286,
"logps/chosen": -326.17193603515625,
"logps/rejected": -374.136474609375,
"loss": 0.5335,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.6986777782440186,
"rewards/margins": 0.6707334518432617,
"rewards/rejected": -1.3694112300872803,
"step": 310
},
{
"epoch": 0.7314285714285714,
"grad_norm": 26.8635604286969,
"learning_rate": 1.0160238692045331e-07,
"logits/chosen": -0.23095369338989258,
"logits/rejected": 0.5602467656135559,
"logps/chosen": -329.86492919921875,
"logps/rejected": -387.0187683105469,
"loss": 0.5276,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.8558257222175598,
"rewards/margins": 0.6044800281524658,
"rewards/rejected": -1.4603056907653809,
"step": 320
},
{
"epoch": 0.7542857142857143,
"grad_norm": 24.235988068898312,
"learning_rate": 8.601038193139438e-08,
"logits/chosen": -0.3879459798336029,
"logits/rejected": 0.737913966178894,
"logps/chosen": -381.5906677246094,
"logps/rejected": -407.6598815917969,
"loss": 0.5205,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.823724627494812,
"rewards/margins": 0.7749950289726257,
"rewards/rejected": -1.5987197160720825,
"step": 330
},
{
"epoch": 0.7771428571428571,
"grad_norm": 19.828650597594493,
"learning_rate": 7.146574594727572e-08,
"logits/chosen": 0.20474159717559814,
"logits/rejected": 1.0162971019744873,
"logps/chosen": -362.6044921875,
"logps/rejected": -421.4503479003906,
"loss": 0.521,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.9665349721908569,
"rewards/margins": 0.8277307748794556,
"rewards/rejected": -1.7942657470703125,
"step": 340
},
{
"epoch": 0.8,
"grad_norm": 22.77041280299393,
"learning_rate": 5.8061372659157306e-08,
"logits/chosen": -0.23095539212226868,
"logits/rejected": 0.8589683771133423,
"logps/chosen": -380.7283630371094,
"logps/rejected": -412.0213317871094,
"loss": 0.5294,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.9102112054824829,
"rewards/margins": 0.667026162147522,
"rewards/rejected": -1.5772373676300049,
"step": 350
},
{
"epoch": 0.8228571428571428,
"grad_norm": 24.963575760417292,
"learning_rate": 4.5882873127531614e-08,
"logits/chosen": -0.4319024682044983,
"logits/rejected": 0.861344039440155,
"logps/chosen": -366.2247314453125,
"logps/rejected": -415.48028564453125,
"loss": 0.5177,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.8081969022750854,
"rewards/margins": 0.8357402086257935,
"rewards/rejected": -1.643937110900879,
"step": 360
},
{
"epoch": 0.8457142857142858,
"grad_norm": 22.865584640185784,
"learning_rate": 3.500802900154412e-08,
"logits/chosen": -0.290349543094635,
"logits/rejected": 1.0458359718322754,
"logps/chosen": -349.37005615234375,
"logps/rejected": -402.3358459472656,
"loss": 0.5229,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.7830866575241089,
"rewards/margins": 0.856115996837616,
"rewards/rejected": -1.6392027139663696,
"step": 370
},
{
"epoch": 0.8685714285714285,
"grad_norm": 25.875193424699745,
"learning_rate": 2.550629574310309e-08,
"logits/chosen": -0.4227335453033447,
"logits/rejected": 0.8518049120903015,
"logps/chosen": -414.580322265625,
"logps/rejected": -417.60418701171875,
"loss": 0.5205,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.9652398824691772,
"rewards/margins": 0.6368075609207153,
"rewards/rejected": -1.602047324180603,
"step": 380
},
{
"epoch": 0.8914285714285715,
"grad_norm": 23.202834331091832,
"learning_rate": 1.7438359028687983e-08,
"logits/chosen": -0.143943652510643,
"logits/rejected": 0.5985423922538757,
"logps/chosen": -392.14068603515625,
"logps/rejected": -445.93560791015625,
"loss": 0.526,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.8076774477958679,
"rewards/margins": 0.7039517760276794,
"rewards/rejected": -1.5116291046142578,
"step": 390
},
{
"epoch": 0.9142857142857143,
"grad_norm": 31.25928182468869,
"learning_rate": 1.0855747162029361e-08,
"logits/chosen": 0.005290505476295948,
"logits/rejected": 0.5700523257255554,
"logps/chosen": -371.91815185546875,
"logps/rejected": -422.913818359375,
"loss": 0.5493,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.9318493604660034,
"rewards/margins": 0.6315523982048035,
"rewards/rejected": -1.563401699066162,
"step": 400
},
{
"epoch": 0.9142857142857143,
"eval_logits/chosen": -0.012980658560991287,
"eval_logits/rejected": 1.572424292564392,
"eval_logps/chosen": -357.8257751464844,
"eval_logps/rejected": -398.6412353515625,
"eval_loss": 0.5201366543769836,
"eval_rewards/accuracies": 0.7758620977401733,
"eval_rewards/chosen": -0.8222182989120483,
"eval_rewards/margins": 0.9734326004981995,
"eval_rewards/rejected": -1.795650839805603,
"eval_runtime": 90.9384,
"eval_samples_per_second": 20.135,
"eval_steps_per_second": 0.319,
"step": 400
},
{
"epoch": 0.9371428571428572,
"grad_norm": 24.160283510679385,
"learning_rate": 5.8005019731033615e-09,
"logits/chosen": -0.10076072067022324,
"logits/rejected": 0.8628407716751099,
"logps/chosen": -385.8863525390625,
"logps/rejected": -425.5079040527344,
"loss": 0.5157,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.977064311504364,
"rewards/margins": 0.6807142496109009,
"rewards/rejected": -1.6577785015106201,
"step": 410
},
{
"epoch": 0.96,
"grad_norm": 21.949347011437847,
"learning_rate": 2.3049103053431886e-09,
"logits/chosen": -0.21599116921424866,
"logits/rejected": 1.2091736793518066,
"logps/chosen": -353.96514892578125,
"logps/rejected": -403.6047058105469,
"loss": 0.5167,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.7121706604957581,
"rewards/margins": 0.9840081930160522,
"rewards/rejected": -1.696178674697876,
"step": 420
},
{
"epoch": 0.9828571428571429,
"grad_norm": 23.473520497590073,
"learning_rate": 3.9129780600541397e-10,
"logits/chosen": 0.03587682545185089,
"logits/rejected": 1.0841922760009766,
"logps/chosen": -372.0699768066406,
"logps/rejected": -431.086181640625,
"loss": 0.5158,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.8983810544013977,
"rewards/margins": 0.775784432888031,
"rewards/rejected": -1.6741654872894287,
"step": 430
},
{
"epoch": 0.9988571428571429,
"step": 437,
"total_flos": 0.0,
"train_loss": 0.5608050315822016,
"train_runtime": 10950.5403,
"train_samples_per_second": 5.114,
"train_steps_per_second": 0.04
}
],
"logging_steps": 10,
"max_steps": 437,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}