base-sft-safe-spin-v / trainer_state.json
AmberYifan's picture
Model save
8d7f4d1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 1719,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.9069767441860465e-09,
"logits/generated": -3.012260675430298,
"logits/real": -2.981379270553589,
"logps/generated": -121.78553009033203,
"logps/real": -157.20819091796875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 2.9069767441860464e-08,
"logits/generated": -2.961106538772583,
"logits/real": -2.9408955574035645,
"logps/generated": -125.34223175048828,
"logps/real": -137.5188446044922,
"loss": 0.6908,
"rewards/accuracies": 0.5555555820465088,
"rewards/generated": -0.0030116664711385965,
"rewards/margins": 0.01261158287525177,
"rewards/real": 0.009599916636943817,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 5.813953488372093e-08,
"logits/generated": -2.963073253631592,
"logits/real": -2.9351158142089844,
"logps/generated": -122.87374114990234,
"logps/real": -133.8837127685547,
"loss": 0.6375,
"rewards/accuracies": 0.6000000238418579,
"rewards/generated": -0.058080412447452545,
"rewards/margins": 0.14583885669708252,
"rewards/real": 0.08775845915079117,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 8.720930232558139e-08,
"logits/generated": -2.9640278816223145,
"logits/real": -2.9266650676727295,
"logps/generated": -115.86125183105469,
"logps/real": -129.8009796142578,
"loss": 0.5498,
"rewards/accuracies": 0.8125,
"rewards/generated": -0.30484524369239807,
"rewards/margins": 0.5773354768753052,
"rewards/real": 0.2724902033805847,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 1.1627906976744186e-07,
"logits/generated": -2.9708516597747803,
"logits/real": -2.8813812732696533,
"logps/generated": -122.1348876953125,
"logps/real": -123.1031723022461,
"loss": 0.5168,
"rewards/accuracies": 0.75,
"rewards/generated": -0.330232709646225,
"rewards/margins": 0.7562737464904785,
"rewards/real": 0.42604103684425354,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 1.4534883720930232e-07,
"logits/generated": -2.961805820465088,
"logits/real": -2.8516037464141846,
"logps/generated": -130.59262084960938,
"logps/real": -131.277099609375,
"loss": 0.4904,
"rewards/accuracies": 0.75,
"rewards/generated": -0.683895468711853,
"rewards/margins": 1.2348084449768066,
"rewards/real": 0.5509130358695984,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 1.7441860465116279e-07,
"logits/generated": -2.8933191299438477,
"logits/real": -2.8156707286834717,
"logps/generated": -131.11270141601562,
"logps/real": -138.29629516601562,
"loss": 0.4853,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": -1.2479474544525146,
"rewards/margins": 1.7186332941055298,
"rewards/real": 0.4706856608390808,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 2.0348837209302325e-07,
"logits/generated": -2.84045672416687,
"logits/real": -2.816912889480591,
"logps/generated": -129.1736297607422,
"logps/real": -138.9403076171875,
"loss": 0.4151,
"rewards/accuracies": 0.8125,
"rewards/generated": -1.7937465906143188,
"rewards/margins": 2.168488025665283,
"rewards/real": 0.3747415244579315,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 2.3255813953488372e-07,
"logits/generated": -2.8057825565338135,
"logits/real": -2.7365589141845703,
"logps/generated": -134.1271209716797,
"logps/real": -128.5058135986328,
"loss": 0.4281,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -2.476748466491699,
"rewards/margins": 2.6212470531463623,
"rewards/real": 0.14449895918369293,
"step": 80
},
{
"epoch": 0.05,
"learning_rate": 2.616279069767442e-07,
"logits/generated": -2.8339638710021973,
"logits/real": -2.7323813438415527,
"logps/generated": -161.02420043945312,
"logps/real": -132.14015197753906,
"loss": 0.3775,
"rewards/accuracies": 0.8125,
"rewards/generated": -3.9137425422668457,
"rewards/margins": 3.994602918624878,
"rewards/real": 0.0808596983551979,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 2.9069767441860464e-07,
"logits/generated": -2.744694948196411,
"logits/real": -2.7067599296569824,
"logps/generated": -170.92324829101562,
"logps/real": -138.30003356933594,
"loss": 0.3742,
"rewards/accuracies": 0.75,
"rewards/generated": -4.676175594329834,
"rewards/margins": 4.555473804473877,
"rewards/real": -0.12070190906524658,
"step": 100
},
{
"epoch": 0.06,
"eval_logits/generated": -2.743514060974121,
"eval_logits/real": -2.7147889137268066,
"eval_logps/generated": -163.19664001464844,
"eval_logps/real": -138.36029052734375,
"eval_loss": 0.22440293431282043,
"eval_rewards/accuracies": 0.9657643437385559,
"eval_rewards/generated": -6.68798303604126,
"eval_rewards/margins": 6.318512439727783,
"eval_rewards/real": -0.3694704473018646,
"eval_runtime": 332.6054,
"eval_samples_per_second": 15.033,
"eval_steps_per_second": 0.472,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 3.1976744186046514e-07,
"logits/generated": -2.7230546474456787,
"logits/real": -2.6995229721069336,
"logps/generated": -147.0129852294922,
"logps/real": -138.2716064453125,
"loss": 0.3616,
"rewards/accuracies": 0.75,
"rewards/generated": -3.036238193511963,
"rewards/margins": 2.524616003036499,
"rewards/real": -0.5116221904754639,
"step": 110
},
{
"epoch": 0.07,
"learning_rate": 3.4883720930232557e-07,
"logits/generated": -2.7442073822021484,
"logits/real": -2.6913013458251953,
"logps/generated": -181.04037475585938,
"logps/real": -129.39993286132812,
"loss": 0.353,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -6.460757255554199,
"rewards/margins": 6.2252020835876465,
"rewards/real": -0.2355557233095169,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 3.77906976744186e-07,
"logits/generated": -2.718548536300659,
"logits/real": -2.6518301963806152,
"logps/generated": -178.575439453125,
"logps/real": -130.4645233154297,
"loss": 0.3546,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": -6.095456123352051,
"rewards/margins": 5.877336502075195,
"rewards/real": -0.2181190699338913,
"step": 130
},
{
"epoch": 0.08,
"learning_rate": 4.069767441860465e-07,
"logits/generated": -2.6532301902770996,
"logits/real": -2.608750820159912,
"logps/generated": -195.4745330810547,
"logps/real": -143.8194580078125,
"loss": 0.3001,
"rewards/accuracies": 0.8125,
"rewards/generated": -7.106230735778809,
"rewards/margins": 6.365324974060059,
"rewards/real": -0.7409064173698425,
"step": 140
},
{
"epoch": 0.09,
"learning_rate": 4.3604651162790694e-07,
"logits/generated": -2.59714674949646,
"logits/real": -2.526071310043335,
"logps/generated": -216.6206817626953,
"logps/real": -126.90464782714844,
"loss": 0.3096,
"rewards/accuracies": 0.824999988079071,
"rewards/generated": -10.569665908813477,
"rewards/margins": 9.900853157043457,
"rewards/real": -0.6688116788864136,
"step": 150
},
{
"epoch": 0.09,
"learning_rate": 4.6511627906976743e-07,
"logits/generated": -2.5910415649414062,
"logits/real": -2.5469748973846436,
"logps/generated": -186.9461669921875,
"logps/real": -154.80783081054688,
"loss": 0.2858,
"rewards/accuracies": 0.8125,
"rewards/generated": -6.714383125305176,
"rewards/margins": 5.151293754577637,
"rewards/real": -1.5630899667739868,
"step": 160
},
{
"epoch": 0.1,
"learning_rate": 4.941860465116279e-07,
"logits/generated": -2.5762853622436523,
"logits/real": -2.506405830383301,
"logps/generated": -200.08053588867188,
"logps/real": -147.4757843017578,
"loss": 0.2498,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -8.602258682250977,
"rewards/margins": 7.1432204246521,
"rewards/real": -1.4590368270874023,
"step": 170
},
{
"epoch": 0.1,
"learning_rate": 4.974143503555268e-07,
"logits/generated": -2.57206392288208,
"logits/real": -2.546504497528076,
"logps/generated": -240.4123077392578,
"logps/real": -146.31198120117188,
"loss": 0.306,
"rewards/accuracies": 0.8125,
"rewards/generated": -11.691746711730957,
"rewards/margins": 10.91369915008545,
"rewards/real": -0.778047502040863,
"step": 180
},
{
"epoch": 0.11,
"learning_rate": 4.941822882999354e-07,
"logits/generated": -2.58891224861145,
"logits/real": -2.5609164237976074,
"logps/generated": -281.37774658203125,
"logps/real": -154.63778686523438,
"loss": 0.2973,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": -15.484718322753906,
"rewards/margins": 14.176872253417969,
"rewards/real": -1.307844877243042,
"step": 190
},
{
"epoch": 0.12,
"learning_rate": 4.909502262443438e-07,
"logits/generated": -2.530224561691284,
"logits/real": -2.517199993133545,
"logps/generated": -273.39190673828125,
"logps/real": -140.16693115234375,
"loss": 0.2528,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": -15.023447036743164,
"rewards/margins": 14.073616027832031,
"rewards/real": -0.9498294591903687,
"step": 200
},
{
"epoch": 0.12,
"eval_logits/generated": -2.457301378250122,
"eval_logits/real": -2.4670825004577637,
"eval_logps/generated": -274.8525085449219,
"eval_logps/real": -147.06544494628906,
"eval_loss": 0.13190196454524994,
"eval_rewards/accuracies": 0.9697452187538147,
"eval_rewards/generated": -17.85356903076172,
"eval_rewards/margins": 16.61358070373535,
"eval_rewards/real": -1.2399863004684448,
"eval_runtime": 327.4999,
"eval_samples_per_second": 15.267,
"eval_steps_per_second": 0.479,
"step": 200
},
{
"epoch": 0.12,
"learning_rate": 4.877181641887524e-07,
"logits/generated": -2.4890782833099365,
"logits/real": -2.429434299468994,
"logps/generated": -314.6974792480469,
"logps/real": -135.95115661621094,
"loss": 0.2686,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -19.41824722290039,
"rewards/margins": 18.349300384521484,
"rewards/real": -1.0689440965652466,
"step": 210
},
{
"epoch": 0.13,
"learning_rate": 4.84486102133161e-07,
"logits/generated": -2.4631810188293457,
"logits/real": -2.4491894245147705,
"logps/generated": -289.0598449707031,
"logps/real": -143.9029998779297,
"loss": 0.2482,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -16.25137710571289,
"rewards/margins": 15.00184440612793,
"rewards/real": -1.2495319843292236,
"step": 220
},
{
"epoch": 0.13,
"learning_rate": 4.812540400775695e-07,
"logits/generated": -2.45487642288208,
"logits/real": -2.4820148944854736,
"logps/generated": -251.5857696533203,
"logps/real": -144.88449096679688,
"loss": 0.2956,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -13.020822525024414,
"rewards/margins": 11.935991287231445,
"rewards/real": -1.0848290920257568,
"step": 230
},
{
"epoch": 0.14,
"learning_rate": 4.78021978021978e-07,
"logits/generated": -2.500182867050171,
"logits/real": -2.5177927017211914,
"logps/generated": -212.17990112304688,
"logps/real": -164.4898223876953,
"loss": 0.2426,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -8.700544357299805,
"rewards/margins": 7.066586971282959,
"rewards/real": -1.6339576244354248,
"step": 240
},
{
"epoch": 0.15,
"learning_rate": 4.747899159663865e-07,
"logits/generated": -2.469505548477173,
"logits/real": -2.513817548751831,
"logps/generated": -250.53787231445312,
"logps/real": -166.6981658935547,
"loss": 0.2077,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -13.388379096984863,
"rewards/margins": 11.607447624206543,
"rewards/real": -1.780932068824768,
"step": 250
},
{
"epoch": 0.15,
"learning_rate": 4.7155785391079506e-07,
"logits/generated": -2.5186924934387207,
"logits/real": -2.4782614707946777,
"logps/generated": -267.3475646972656,
"logps/real": -157.3760223388672,
"loss": 0.2576,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": -14.050939559936523,
"rewards/margins": 11.882719039916992,
"rewards/real": -2.1682217121124268,
"step": 260
},
{
"epoch": 0.16,
"learning_rate": 4.683257918552036e-07,
"logits/generated": -2.5240137577056885,
"logits/real": -2.484891653060913,
"logps/generated": -282.6662902832031,
"logps/real": -164.34274291992188,
"loss": 0.2201,
"rewards/accuracies": 0.7875000238418579,
"rewards/generated": -16.37204360961914,
"rewards/margins": 14.235898971557617,
"rewards/real": -2.1361422538757324,
"step": 270
},
{
"epoch": 0.16,
"learning_rate": 4.6509372979961214e-07,
"logits/generated": -2.4790737628936768,
"logits/real": -2.4403738975524902,
"logps/generated": -236.24008178710938,
"logps/real": -155.16851806640625,
"loss": 0.2114,
"rewards/accuracies": 0.824999988079071,
"rewards/generated": -12.50011157989502,
"rewards/margins": 11.145486831665039,
"rewards/real": -1.354625940322876,
"step": 280
},
{
"epoch": 0.17,
"learning_rate": 4.618616677440207e-07,
"logits/generated": -2.430192232131958,
"logits/real": -2.3869004249572754,
"logps/generated": -249.726806640625,
"logps/real": -151.4705352783203,
"loss": 0.2224,
"rewards/accuracies": 0.875,
"rewards/generated": -13.20459270477295,
"rewards/margins": 11.15159797668457,
"rewards/real": -2.0529935359954834,
"step": 290
},
{
"epoch": 0.17,
"learning_rate": 4.5862960568842917e-07,
"logits/generated": -2.480708122253418,
"logits/real": -2.369931697845459,
"logps/generated": -282.884033203125,
"logps/real": -145.2495574951172,
"loss": 0.2066,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -15.511686325073242,
"rewards/margins": 13.962892532348633,
"rewards/real": -1.5487936735153198,
"step": 300
},
{
"epoch": 0.17,
"eval_logits/generated": -2.425682783126831,
"eval_logits/real": -2.3622233867645264,
"eval_logps/generated": -293.674560546875,
"eval_logps/real": -151.37994384765625,
"eval_loss": 0.11716413497924805,
"eval_rewards/accuracies": 0.9617834687232971,
"eval_rewards/generated": -19.735776901245117,
"eval_rewards/margins": 18.064340591430664,
"eval_rewards/real": -1.6714372634887695,
"eval_runtime": 328.1821,
"eval_samples_per_second": 15.235,
"eval_steps_per_second": 0.478,
"step": 300
},
{
"epoch": 0.18,
"learning_rate": 4.5539754363283774e-07,
"logits/generated": -2.4415905475616455,
"logits/real": -2.3295607566833496,
"logps/generated": -287.18682861328125,
"logps/real": -155.09823608398438,
"loss": 0.2118,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": -16.675642013549805,
"rewards/margins": 14.781652450561523,
"rewards/real": -1.8939898014068604,
"step": 310
},
{
"epoch": 0.19,
"learning_rate": 4.5216548157724625e-07,
"logits/generated": -2.463688850402832,
"logits/real": -2.4042744636535645,
"logps/generated": -241.97543334960938,
"logps/real": -152.55368041992188,
"loss": 0.2114,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -12.119455337524414,
"rewards/margins": 10.819900512695312,
"rewards/real": -1.2995555400848389,
"step": 320
},
{
"epoch": 0.19,
"learning_rate": 4.489334195216548e-07,
"logits/generated": -2.4680073261260986,
"logits/real": -2.427863359451294,
"logps/generated": -251.1258087158203,
"logps/real": -163.27366638183594,
"loss": 0.2059,
"rewards/accuracies": 0.9375,
"rewards/generated": -12.832430839538574,
"rewards/margins": 11.404412269592285,
"rewards/real": -1.4280211925506592,
"step": 330
},
{
"epoch": 0.2,
"learning_rate": 4.4570135746606334e-07,
"logits/generated": -2.4172284603118896,
"logits/real": -2.3573529720306396,
"logps/generated": -298.1008605957031,
"logps/real": -156.8496856689453,
"loss": 0.1686,
"rewards/accuracies": 0.875,
"rewards/generated": -18.446123123168945,
"rewards/margins": 16.2222900390625,
"rewards/real": -2.2238337993621826,
"step": 340
},
{
"epoch": 0.2,
"learning_rate": 4.4246929541047185e-07,
"logits/generated": -2.4144504070281982,
"logits/real": -2.3207590579986572,
"logps/generated": -212.0926055908203,
"logps/real": -166.0170135498047,
"loss": 0.2305,
"rewards/accuracies": 0.824999988079071,
"rewards/generated": -9.594592094421387,
"rewards/margins": 6.877285003662109,
"rewards/real": -2.7173075675964355,
"step": 350
},
{
"epoch": 0.21,
"learning_rate": 4.3923723335488036e-07,
"logits/generated": -2.4157674312591553,
"logits/real": -2.4187474250793457,
"logps/generated": -303.06683349609375,
"logps/real": -163.6787872314453,
"loss": 0.1917,
"rewards/accuracies": 0.875,
"rewards/generated": -18.166648864746094,
"rewards/margins": 15.954483032226562,
"rewards/real": -2.2121691703796387,
"step": 360
},
{
"epoch": 0.22,
"learning_rate": 4.3600517129928893e-07,
"logits/generated": -2.465456485748291,
"logits/real": -2.445255756378174,
"logps/generated": -245.350341796875,
"logps/real": -177.14071655273438,
"loss": 0.2148,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -10.88718032836914,
"rewards/margins": 8.129331588745117,
"rewards/real": -2.7578492164611816,
"step": 370
},
{
"epoch": 0.22,
"learning_rate": 4.327731092436975e-07,
"logits/generated": -2.4552018642425537,
"logits/real": -2.424823760986328,
"logps/generated": -252.9711456298828,
"logps/real": -154.53909301757812,
"loss": 0.254,
"rewards/accuracies": 0.8125,
"rewards/generated": -13.141611099243164,
"rewards/margins": 10.592477798461914,
"rewards/real": -2.5491321086883545,
"step": 380
},
{
"epoch": 0.23,
"learning_rate": 4.2954104718810596e-07,
"logits/generated": -2.464360475540161,
"logits/real": -2.444789409637451,
"logps/generated": -296.9568786621094,
"logps/real": -163.8868408203125,
"loss": 0.2066,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -17.908769607543945,
"rewards/margins": 15.718464851379395,
"rewards/real": -2.190305233001709,
"step": 390
},
{
"epoch": 0.23,
"learning_rate": 4.2630898513251453e-07,
"logits/generated": -2.506937265396118,
"logits/real": -2.470973253250122,
"logps/generated": -294.0633239746094,
"logps/real": -160.49642944335938,
"loss": 0.2207,
"rewards/accuracies": 0.7875000238418579,
"rewards/generated": -17.2155704498291,
"rewards/margins": 14.985618591308594,
"rewards/real": -2.229950189590454,
"step": 400
},
{
"epoch": 0.23,
"eval_logits/generated": -2.4888620376586914,
"eval_logits/real": -2.452549695968628,
"eval_logps/generated": -303.0499572753906,
"eval_logps/real": -154.09176635742188,
"eval_loss": 0.1093968003988266,
"eval_rewards/accuracies": 0.9729299545288086,
"eval_rewards/generated": -20.673315048217773,
"eval_rewards/margins": 18.73069953918457,
"eval_rewards/real": -1.942617416381836,
"eval_runtime": 327.2801,
"eval_samples_per_second": 15.277,
"eval_steps_per_second": 0.48,
"step": 400
},
{
"epoch": 0.24,
"learning_rate": 4.2307692307692304e-07,
"logits/generated": -2.543400287628174,
"logits/real": -2.417271614074707,
"logps/generated": -281.9232177734375,
"logps/real": -161.21182250976562,
"loss": 0.1673,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -14.634592056274414,
"rewards/margins": 12.80103588104248,
"rewards/real": -1.8335540294647217,
"step": 410
},
{
"epoch": 0.24,
"learning_rate": 4.198448610213316e-07,
"logits/generated": -2.477651357650757,
"logits/real": -2.4087789058685303,
"logps/generated": -285.4056701660156,
"logps/real": -157.8638916015625,
"loss": 0.201,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -16.256460189819336,
"rewards/margins": 13.421223640441895,
"rewards/real": -2.835240602493286,
"step": 420
},
{
"epoch": 0.25,
"learning_rate": 4.166127989657401e-07,
"logits/generated": -2.4897210597991943,
"logits/real": -2.4585866928100586,
"logps/generated": -301.2572937011719,
"logps/real": -160.141845703125,
"loss": 0.1924,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -17.867128372192383,
"rewards/margins": 15.476908683776855,
"rewards/real": -2.3902173042297363,
"step": 430
},
{
"epoch": 0.26,
"learning_rate": 4.1338073691014864e-07,
"logits/generated": -2.5035512447357178,
"logits/real": -2.3802120685577393,
"logps/generated": -332.97222900390625,
"logps/real": -154.29495239257812,
"loss": 0.175,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -21.23163414001465,
"rewards/margins": 18.370525360107422,
"rewards/real": -2.861109972000122,
"step": 440
},
{
"epoch": 0.26,
"learning_rate": 4.1014867485455715e-07,
"logits/generated": -2.418759822845459,
"logits/real": -2.3379578590393066,
"logps/generated": -336.86175537109375,
"logps/real": -156.2376251220703,
"loss": 0.1849,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -22.108346939086914,
"rewards/margins": 19.20734977722168,
"rewards/real": -2.900996685028076,
"step": 450
},
{
"epoch": 0.27,
"learning_rate": 4.069166127989657e-07,
"logits/generated": -2.4547057151794434,
"logits/real": -2.286864995956421,
"logps/generated": -276.150390625,
"logps/real": -144.59564208984375,
"loss": 0.1647,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -16.615272521972656,
"rewards/margins": 14.403627395629883,
"rewards/real": -2.211641788482666,
"step": 460
},
{
"epoch": 0.27,
"learning_rate": 4.036845507433743e-07,
"logits/generated": -2.4923219680786133,
"logits/real": -2.362297296524048,
"logps/generated": -312.7582702636719,
"logps/real": -159.96646118164062,
"loss": 0.1664,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -18.795948028564453,
"rewards/margins": 16.829730987548828,
"rewards/real": -1.9662189483642578,
"step": 470
},
{
"epoch": 0.28,
"learning_rate": 4.004524886877828e-07,
"logits/generated": -2.490182399749756,
"logits/real": -2.3277366161346436,
"logps/generated": -391.92047119140625,
"logps/real": -153.3131866455078,
"loss": 0.1659,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -28.46506118774414,
"rewards/margins": 26.065074920654297,
"rewards/real": -2.39998459815979,
"step": 480
},
{
"epoch": 0.29,
"learning_rate": 3.972204266321913e-07,
"logits/generated": -2.507981777191162,
"logits/real": -2.405111789703369,
"logps/generated": -244.44027709960938,
"logps/real": -147.1367950439453,
"loss": 0.1879,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -13.030759811401367,
"rewards/margins": 11.476921081542969,
"rewards/real": -1.5538378953933716,
"step": 490
},
{
"epoch": 0.29,
"learning_rate": 3.9398836457659983e-07,
"logits/generated": -2.5344769954681396,
"logits/real": -2.3722431659698486,
"logps/generated": -196.79986572265625,
"logps/real": -149.31695556640625,
"loss": 0.4379,
"rewards/accuracies": 0.875,
"rewards/generated": -6.881800174713135,
"rewards/margins": 5.857499599456787,
"rewards/real": -1.0242998600006104,
"step": 500
},
{
"epoch": 0.29,
"eval_logits/generated": -2.386992931365967,
"eval_logits/real": -2.344109296798706,
"eval_logps/generated": -179.73768615722656,
"eval_logps/real": -144.66737365722656,
"eval_loss": 0.11515343934297562,
"eval_rewards/accuracies": 0.9665604829788208,
"eval_rewards/generated": -8.342087745666504,
"eval_rewards/margins": 7.341910362243652,
"eval_rewards/real": -1.000178575515747,
"eval_runtime": 326.2978,
"eval_samples_per_second": 15.323,
"eval_steps_per_second": 0.481,
"step": 500
},
{
"epoch": 0.3,
"learning_rate": 3.907563025210084e-07,
"logits/generated": -2.3947510719299316,
"logits/real": -2.409266471862793,
"logps/generated": -194.82321166992188,
"logps/real": -157.6947784423828,
"loss": 0.1571,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -7.0470075607299805,
"rewards/margins": 5.375405311584473,
"rewards/real": -1.6716020107269287,
"step": 510
},
{
"epoch": 0.3,
"learning_rate": 3.875242404654169e-07,
"logits/generated": -2.335791826248169,
"logits/real": -2.3169474601745605,
"logps/generated": -207.33126831054688,
"logps/real": -156.51400756835938,
"loss": 0.1419,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -9.35986042022705,
"rewards/margins": 6.684755802154541,
"rewards/real": -2.675104856491089,
"step": 520
},
{
"epoch": 0.31,
"learning_rate": 3.842921784098255e-07,
"logits/generated": -2.3748762607574463,
"logits/real": -2.400601625442505,
"logps/generated": -222.042724609375,
"logps/real": -170.61839294433594,
"loss": 0.1629,
"rewards/accuracies": 0.875,
"rewards/generated": -9.549965858459473,
"rewards/margins": 7.031239986419678,
"rewards/real": -2.518725633621216,
"step": 530
},
{
"epoch": 0.31,
"learning_rate": 3.8106011635423394e-07,
"logits/generated": -2.3414711952209473,
"logits/real": -2.2793216705322266,
"logps/generated": -219.4429168701172,
"logps/real": -160.81027221679688,
"loss": 0.1361,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -10.753046989440918,
"rewards/margins": 8.526832580566406,
"rewards/real": -2.22621488571167,
"step": 540
},
{
"epoch": 0.32,
"learning_rate": 3.778280542986425e-07,
"logits/generated": -2.356356143951416,
"logits/real": -2.2714390754699707,
"logps/generated": -216.5394287109375,
"logps/real": -145.5234832763672,
"loss": 0.1829,
"rewards/accuracies": 0.875,
"rewards/generated": -10.430225372314453,
"rewards/margins": 8.845321655273438,
"rewards/real": -1.5849040746688843,
"step": 550
},
{
"epoch": 0.33,
"learning_rate": 3.745959922430511e-07,
"logits/generated": -2.3817245960235596,
"logits/real": -2.2892487049102783,
"logps/generated": -200.70140075683594,
"logps/real": -155.33592224121094,
"loss": 0.1745,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -8.53106689453125,
"rewards/margins": 6.320080280303955,
"rewards/real": -2.210986614227295,
"step": 560
},
{
"epoch": 0.33,
"learning_rate": 3.713639301874596e-07,
"logits/generated": -2.365756034851074,
"logits/real": -2.330933094024658,
"logps/generated": -228.14828491210938,
"logps/real": -170.01014709472656,
"loss": 0.1399,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -10.85645866394043,
"rewards/margins": 8.518800735473633,
"rewards/real": -2.3376574516296387,
"step": 570
},
{
"epoch": 0.34,
"learning_rate": 3.6813186813186816e-07,
"logits/generated": -2.2974698543548584,
"logits/real": -2.2853493690490723,
"logps/generated": -231.7186737060547,
"logps/real": -154.68409729003906,
"loss": 0.144,
"rewards/accuracies": 0.875,
"rewards/generated": -11.895675659179688,
"rewards/margins": 9.608539581298828,
"rewards/real": -2.2871367931365967,
"step": 580
},
{
"epoch": 0.34,
"learning_rate": 3.648998060762766e-07,
"logits/generated": -2.323545217514038,
"logits/real": -2.2863316535949707,
"logps/generated": -216.95333862304688,
"logps/real": -152.7818145751953,
"loss": 0.1667,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -9.12224006652832,
"rewards/margins": 7.0738372802734375,
"rewards/real": -2.0484039783477783,
"step": 590
},
{
"epoch": 0.35,
"learning_rate": 3.616677440206852e-07,
"logits/generated": -2.39691162109375,
"logits/real": -2.270139217376709,
"logps/generated": -199.4903106689453,
"logps/real": -154.24615478515625,
"loss": 0.1517,
"rewards/accuracies": 0.875,
"rewards/generated": -6.92580509185791,
"rewards/margins": 5.0083699226379395,
"rewards/real": -1.9174346923828125,
"step": 600
},
{
"epoch": 0.35,
"eval_logits/generated": -2.2691421508789062,
"eval_logits/real": -2.274235725402832,
"eval_logps/generated": -225.55331420898438,
"eval_logps/real": -151.24253845214844,
"eval_loss": 0.09844768047332764,
"eval_rewards/accuracies": 0.9745222926139832,
"eval_rewards/generated": -12.923652648925781,
"eval_rewards/margins": 11.265958786010742,
"eval_rewards/real": -1.6576942205429077,
"eval_runtime": 324.4499,
"eval_samples_per_second": 15.411,
"eval_steps_per_second": 0.484,
"step": 600
},
{
"epoch": 0.35,
"learning_rate": 3.584356819650937e-07,
"logits/generated": -2.3561298847198486,
"logits/real": -2.2546021938323975,
"logps/generated": -272.76910400390625,
"logps/real": -166.93630981445312,
"loss": 0.1387,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -13.198400497436523,
"rewards/margins": 10.639951705932617,
"rewards/real": -2.5584499835968018,
"step": 610
},
{
"epoch": 0.36,
"learning_rate": 3.5520361990950227e-07,
"logits/generated": -2.3207361698150635,
"logits/real": -2.2687323093414307,
"logps/generated": -254.7984619140625,
"logps/real": -156.127197265625,
"loss": 0.1328,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -13.316617965698242,
"rewards/margins": 11.088491439819336,
"rewards/real": -2.228126049041748,
"step": 620
},
{
"epoch": 0.37,
"learning_rate": 3.519715578539108e-07,
"logits/generated": -2.2628586292266846,
"logits/real": -2.2417876720428467,
"logps/generated": -217.26290893554688,
"logps/real": -147.60806274414062,
"loss": 0.1532,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -10.866621017456055,
"rewards/margins": 8.915276527404785,
"rewards/real": -1.95134699344635,
"step": 630
},
{
"epoch": 0.37,
"learning_rate": 3.487394957983193e-07,
"logits/generated": -2.2968461513519287,
"logits/real": -2.362973928451538,
"logps/generated": -230.63998413085938,
"logps/real": -171.29905700683594,
"loss": 0.1631,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -10.994275093078613,
"rewards/margins": 8.367854118347168,
"rewards/real": -2.626420497894287,
"step": 640
},
{
"epoch": 0.38,
"learning_rate": 3.4550743374272786e-07,
"logits/generated": -2.4199037551879883,
"logits/real": -2.3053011894226074,
"logps/generated": -240.74765014648438,
"logps/real": -161.87173461914062,
"loss": 0.1482,
"rewards/accuracies": 0.875,
"rewards/generated": -11.601387023925781,
"rewards/margins": 8.92815113067627,
"rewards/real": -2.6732351779937744,
"step": 650
},
{
"epoch": 0.38,
"learning_rate": 3.422753716871364e-07,
"logits/generated": -2.2776732444763184,
"logits/real": -2.212689161300659,
"logps/generated": -256.89129638671875,
"logps/real": -144.78775024414062,
"loss": 0.1372,
"rewards/accuracies": 0.824999988079071,
"rewards/generated": -14.179117202758789,
"rewards/margins": 11.40053653717041,
"rewards/real": -2.778578996658325,
"step": 660
},
{
"epoch": 0.39,
"learning_rate": 3.3904330963154494e-07,
"logits/generated": -2.2712647914886475,
"logits/real": -2.31594181060791,
"logps/generated": -285.45355224609375,
"logps/real": -159.91629028320312,
"loss": 0.1318,
"rewards/accuracies": 0.9375,
"rewards/generated": -15.960909843444824,
"rewards/margins": 13.720555305480957,
"rewards/real": -2.2403564453125,
"step": 670
},
{
"epoch": 0.4,
"learning_rate": 3.358112475759534e-07,
"logits/generated": -2.2503881454467773,
"logits/real": -2.2118101119995117,
"logps/generated": -269.7525939941406,
"logps/real": -148.83535766601562,
"loss": 0.1308,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -14.820501327514648,
"rewards/margins": 12.614280700683594,
"rewards/real": -2.206221580505371,
"step": 680
},
{
"epoch": 0.4,
"learning_rate": 3.3257918552036197e-07,
"logits/generated": -2.292738676071167,
"logits/real": -2.2858123779296875,
"logps/generated": -216.58059692382812,
"logps/real": -155.36279296875,
"loss": 0.1334,
"rewards/accuracies": 0.875,
"rewards/generated": -10.16190242767334,
"rewards/margins": 8.57901668548584,
"rewards/real": -1.5828853845596313,
"step": 690
},
{
"epoch": 0.41,
"learning_rate": 3.293471234647705e-07,
"logits/generated": -2.191201686859131,
"logits/real": -2.1718242168426514,
"logps/generated": -248.41610717773438,
"logps/real": -161.17881774902344,
"loss": 0.1708,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -14.112585067749023,
"rewards/margins": 11.10939884185791,
"rewards/real": -3.0031871795654297,
"step": 700
},
{
"epoch": 0.41,
"eval_logits/generated": -2.234299659729004,
"eval_logits/real": -2.2124428749084473,
"eval_logps/generated": -238.25738525390625,
"eval_logps/real": -154.1605224609375,
"eval_loss": 0.0865996927022934,
"eval_rewards/accuracies": 0.9745222926139832,
"eval_rewards/generated": -14.19405746459961,
"eval_rewards/margins": 12.244565963745117,
"eval_rewards/real": -1.949493408203125,
"eval_runtime": 326.243,
"eval_samples_per_second": 15.326,
"eval_steps_per_second": 0.481,
"step": 700
},
{
"epoch": 0.41,
"learning_rate": 3.2611506140917905e-07,
"logits/generated": -2.3678855895996094,
"logits/real": -2.254812717437744,
"logps/generated": -200.08316040039062,
"logps/real": -157.531494140625,
"loss": 0.1238,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -8.445894241333008,
"rewards/margins": 6.2266740798950195,
"rewards/real": -2.2192206382751465,
"step": 710
},
{
"epoch": 0.42,
"learning_rate": 3.2288299935358757e-07,
"logits/generated": -2.294254779815674,
"logits/real": -2.110302448272705,
"logps/generated": -250.7073516845703,
"logps/real": -144.10202026367188,
"loss": 0.1251,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -13.250715255737305,
"rewards/margins": 10.665332794189453,
"rewards/real": -2.585383415222168,
"step": 720
},
{
"epoch": 0.42,
"learning_rate": 3.196509372979961e-07,
"logits/generated": -2.277039051055908,
"logits/real": -2.163405656814575,
"logps/generated": -249.3871612548828,
"logps/real": -158.72262573242188,
"loss": 0.1312,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.719002723693848,
"rewards/margins": 10.320619583129883,
"rewards/real": -2.398383617401123,
"step": 730
},
{
"epoch": 0.43,
"learning_rate": 3.1641887524240465e-07,
"logits/generated": -2.1525888442993164,
"logits/real": -2.1593430042266846,
"logps/generated": -276.4872741699219,
"logps/real": -169.02743530273438,
"loss": 0.0971,
"rewards/accuracies": 0.9375,
"rewards/generated": -15.876765251159668,
"rewards/margins": 13.597285270690918,
"rewards/real": -2.279479503631592,
"step": 740
},
{
"epoch": 0.44,
"learning_rate": 3.1318681318681316e-07,
"logits/generated": -2.177074432373047,
"logits/real": -2.260298013687134,
"logps/generated": -236.02206420898438,
"logps/real": -170.76596069335938,
"loss": 0.1008,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -11.86265754699707,
"rewards/margins": 9.2439546585083,
"rewards/real": -2.6187024116516113,
"step": 750
},
{
"epoch": 0.44,
"learning_rate": 3.0995475113122173e-07,
"logits/generated": -2.1248366832733154,
"logits/real": -2.187145471572876,
"logps/generated": -271.3382263183594,
"logps/real": -168.44320678710938,
"loss": 0.1217,
"rewards/accuracies": 0.875,
"rewards/generated": -15.90544319152832,
"rewards/margins": 12.456186294555664,
"rewards/real": -3.4492554664611816,
"step": 760
},
{
"epoch": 0.45,
"learning_rate": 3.0672268907563024e-07,
"logits/generated": -2.2197773456573486,
"logits/real": -2.114558219909668,
"logps/generated": -241.92642211914062,
"logps/real": -143.5634307861328,
"loss": 0.1154,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -13.74907398223877,
"rewards/margins": 11.252575874328613,
"rewards/real": -2.4964985847473145,
"step": 770
},
{
"epoch": 0.45,
"learning_rate": 3.0349062702003876e-07,
"logits/generated": -2.202606678009033,
"logits/real": -2.1499722003936768,
"logps/generated": -271.87738037109375,
"logps/real": -161.31857299804688,
"loss": 0.1251,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -16.116554260253906,
"rewards/margins": 13.589006423950195,
"rewards/real": -2.5275490283966064,
"step": 780
},
{
"epoch": 0.46,
"learning_rate": 3.0025856496444727e-07,
"logits/generated": -2.345546007156372,
"logits/real": -2.2405107021331787,
"logps/generated": -220.90701293945312,
"logps/real": -160.68800354003906,
"loss": 0.1607,
"rewards/accuracies": 0.875,
"rewards/generated": -10.754340171813965,
"rewards/margins": 8.159425735473633,
"rewards/real": -2.5949156284332275,
"step": 790
},
{
"epoch": 0.47,
"learning_rate": 2.9702650290885584e-07,
"logits/generated": -2.2388525009155273,
"logits/real": -2.1353354454040527,
"logps/generated": -277.51861572265625,
"logps/real": -154.5572509765625,
"loss": 0.1135,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -16.760658264160156,
"rewards/margins": 13.79723072052002,
"rewards/real": -2.963425874710083,
"step": 800
},
{
"epoch": 0.47,
"eval_logits/generated": -2.1789329051971436,
"eval_logits/real": -2.198742628097534,
"eval_logps/generated": -260.81390380859375,
"eval_logps/real": -164.8361358642578,
"eval_loss": 0.08095261454582214,
"eval_rewards/accuracies": 0.9785031676292419,
"eval_rewards/generated": -16.449708938598633,
"eval_rewards/margins": 13.432653427124023,
"eval_rewards/real": -3.017056465148926,
"eval_runtime": 325.1652,
"eval_samples_per_second": 15.377,
"eval_steps_per_second": 0.483,
"step": 800
},
{
"epoch": 0.47,
"learning_rate": 2.9379444085326436e-07,
"logits/generated": -2.269972085952759,
"logits/real": -2.200730085372925,
"logps/generated": -241.37060546875,
"logps/real": -160.4932403564453,
"loss": 0.1393,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -12.075285911560059,
"rewards/margins": 8.877761840820312,
"rewards/real": -3.1975245475769043,
"step": 810
},
{
"epoch": 0.48,
"learning_rate": 2.905623787976729e-07,
"logits/generated": -2.1529345512390137,
"logits/real": -2.130723237991333,
"logps/generated": -278.5174560546875,
"logps/real": -172.23374938964844,
"loss": 0.1035,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -16.6492977142334,
"rewards/margins": 13.676687240600586,
"rewards/real": -2.9726104736328125,
"step": 820
},
{
"epoch": 0.48,
"learning_rate": 2.8733031674208144e-07,
"logits/generated": -2.264868974685669,
"logits/real": -2.2218079566955566,
"logps/generated": -233.82815551757812,
"logps/real": -155.95506286621094,
"loss": 0.1595,
"rewards/accuracies": 0.875,
"rewards/generated": -11.269330024719238,
"rewards/margins": 8.389703750610352,
"rewards/real": -2.879626750946045,
"step": 830
},
{
"epoch": 0.49,
"learning_rate": 2.8409825468648995e-07,
"logits/generated": -2.287550926208496,
"logits/real": -2.292382001876831,
"logps/generated": -261.179443359375,
"logps/real": -170.32998657226562,
"loss": 0.1342,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -14.10278034210205,
"rewards/margins": 11.701745986938477,
"rewards/real": -2.4010345935821533,
"step": 840
},
{
"epoch": 0.49,
"learning_rate": 2.808661926308985e-07,
"logits/generated": -2.33073091506958,
"logits/real": -2.274296283721924,
"logps/generated": -241.5367431640625,
"logps/real": -149.47222900390625,
"loss": 0.115,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -12.70106315612793,
"rewards/margins": 10.1942777633667,
"rewards/real": -2.5067856311798096,
"step": 850
},
{
"epoch": 0.5,
"learning_rate": 2.7763413057530703e-07,
"logits/generated": -2.38000226020813,
"logits/real": -2.3325276374816895,
"logps/generated": -247.8423614501953,
"logps/real": -173.57339477539062,
"loss": 0.131,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -11.641576766967773,
"rewards/margins": 9.302278518676758,
"rewards/real": -2.339297294616699,
"step": 860
},
{
"epoch": 0.51,
"learning_rate": 2.744020685197156e-07,
"logits/generated": -2.4010801315307617,
"logits/real": -2.257711887359619,
"logps/generated": -246.85153198242188,
"logps/real": -166.25039672851562,
"loss": 0.1502,
"rewards/accuracies": 0.9375,
"rewards/generated": -11.700878143310547,
"rewards/margins": 9.326895713806152,
"rewards/real": -2.3739829063415527,
"step": 870
},
{
"epoch": 0.51,
"learning_rate": 2.7117000646412406e-07,
"logits/generated": -2.350351572036743,
"logits/real": -2.226243734359741,
"logps/generated": -263.51776123046875,
"logps/real": -150.32102966308594,
"loss": 0.1257,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -14.319729804992676,
"rewards/margins": 12.047926902770996,
"rewards/real": -2.2718007564544678,
"step": 880
},
{
"epoch": 0.52,
"learning_rate": 2.6793794440853263e-07,
"logits/generated": -2.4051427841186523,
"logits/real": -2.3996026515960693,
"logps/generated": -235.8258819580078,
"logps/real": -158.03170776367188,
"loss": 0.1842,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.911969184875488,
"rewards/margins": 11.036763191223145,
"rewards/real": -1.8752062320709229,
"step": 890
},
{
"epoch": 0.52,
"learning_rate": 2.6470588235294114e-07,
"logits/generated": -2.4504921436309814,
"logits/real": -2.312851905822754,
"logps/generated": -253.6083526611328,
"logps/real": -168.83355712890625,
"loss": 0.1364,
"rewards/accuracies": 0.8125,
"rewards/generated": -12.168034553527832,
"rewards/margins": 8.825535774230957,
"rewards/real": -3.3424973487854004,
"step": 900
},
{
"epoch": 0.52,
"eval_logits/generated": -2.3294870853424072,
"eval_logits/real": -2.3367509841918945,
"eval_logps/generated": -244.4078369140625,
"eval_logps/real": -160.21507263183594,
"eval_loss": 0.08480827510356903,
"eval_rewards/accuracies": 0.9729299545288086,
"eval_rewards/generated": -14.809103965759277,
"eval_rewards/margins": 12.254154205322266,
"eval_rewards/real": -2.5549488067626953,
"eval_runtime": 325.558,
"eval_samples_per_second": 15.358,
"eval_steps_per_second": 0.482,
"step": 900
},
{
"epoch": 0.53,
"learning_rate": 2.614738202973497e-07,
"logits/generated": -2.37715482711792,
"logits/real": -2.3596484661102295,
"logps/generated": -225.02578735351562,
"logps/real": -159.61839294433594,
"loss": 0.1056,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -11.049263000488281,
"rewards/margins": 7.907676696777344,
"rewards/real": -3.1415863037109375,
"step": 910
},
{
"epoch": 0.54,
"learning_rate": 2.582417582417583e-07,
"logits/generated": -2.3448710441589355,
"logits/real": -2.323169231414795,
"logps/generated": -246.141357421875,
"logps/real": -171.561767578125,
"loss": 0.0895,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.90569019317627,
"rewards/margins": 9.77393913269043,
"rewards/real": -3.1317505836486816,
"step": 920
},
{
"epoch": 0.54,
"learning_rate": 2.5500969618616674e-07,
"logits/generated": -2.3373780250549316,
"logits/real": -2.2862701416015625,
"logps/generated": -267.49029541015625,
"logps/real": -170.78024291992188,
"loss": 0.1233,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -14.549161911010742,
"rewards/margins": 11.566902160644531,
"rewards/real": -2.9822611808776855,
"step": 930
},
{
"epoch": 0.55,
"learning_rate": 2.517776341305753e-07,
"logits/generated": -2.359157085418701,
"logits/real": -2.3121438026428223,
"logps/generated": -207.0237274169922,
"logps/real": -151.88720703125,
"loss": 0.1634,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -10.258100509643555,
"rewards/margins": 7.330819606781006,
"rewards/real": -2.927279472351074,
"step": 940
},
{
"epoch": 0.55,
"learning_rate": 2.485455720749838e-07,
"logits/generated": -2.397444009780884,
"logits/real": -2.3451006412506104,
"logps/generated": -255.43264770507812,
"logps/real": -172.95346069335938,
"loss": 0.0939,
"rewards/accuracies": 0.9375,
"rewards/generated": -12.080055236816406,
"rewards/margins": 9.478727340698242,
"rewards/real": -2.601327419281006,
"step": 950
},
{
"epoch": 0.56,
"learning_rate": 2.4531351001939233e-07,
"logits/generated": -2.354841947555542,
"logits/real": -2.379093885421753,
"logps/generated": -275.9471435546875,
"logps/real": -189.16122436523438,
"loss": 0.1414,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -14.650967597961426,
"rewards/margins": 11.281542778015137,
"rewards/real": -3.369422435760498,
"step": 960
},
{
"epoch": 0.56,
"learning_rate": 2.420814479638009e-07,
"logits/generated": -2.3819050788879395,
"logits/real": -2.2698609828948975,
"logps/generated": -278.7743835449219,
"logps/real": -171.54296875,
"loss": 0.1414,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -14.173556327819824,
"rewards/margins": 10.707517623901367,
"rewards/real": -3.4660377502441406,
"step": 970
},
{
"epoch": 0.57,
"learning_rate": 2.388493859082094e-07,
"logits/generated": -2.349855899810791,
"logits/real": -2.2393364906311035,
"logps/generated": -276.61981201171875,
"logps/real": -174.84996032714844,
"loss": 0.1173,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -15.051767349243164,
"rewards/margins": 11.607970237731934,
"rewards/real": -3.443795680999756,
"step": 980
},
{
"epoch": 0.58,
"learning_rate": 2.3561732385261796e-07,
"logits/generated": -2.334862232208252,
"logits/real": -2.3226168155670166,
"logps/generated": -275.31512451171875,
"logps/real": -156.58956909179688,
"loss": 0.1284,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -14.1647310256958,
"rewards/margins": 12.026006698608398,
"rewards/real": -2.1387248039245605,
"step": 990
},
{
"epoch": 0.58,
"learning_rate": 2.323852617970265e-07,
"logits/generated": -2.498349189758301,
"logits/real": -2.4359524250030518,
"logps/generated": -256.3857727050781,
"logps/real": -175.72055053710938,
"loss": 0.1142,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -11.867788314819336,
"rewards/margins": 8.631060600280762,
"rewards/real": -3.2367255687713623,
"step": 1000
},
{
"epoch": 0.58,
"eval_logits/generated": -2.4644362926483154,
"eval_logits/real": -2.478717565536499,
"eval_logps/generated": -202.75526428222656,
"eval_logps/real": -161.3638458251953,
"eval_loss": 0.09024719893932343,
"eval_rewards/accuracies": 0.9713375568389893,
"eval_rewards/generated": -10.643847465515137,
"eval_rewards/margins": 7.974020957946777,
"eval_rewards/real": -2.669825553894043,
"eval_runtime": 325.0517,
"eval_samples_per_second": 15.382,
"eval_steps_per_second": 0.483,
"step": 1000
},
{
"epoch": 0.59,
"learning_rate": 2.29153199741435e-07,
"logits/generated": -2.505728244781494,
"logits/real": -2.505375385284424,
"logps/generated": -230.0421600341797,
"logps/real": -168.85704040527344,
"loss": 0.1156,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.663579940795898,
"rewards/margins": 7.189150333404541,
"rewards/real": -3.474430799484253,
"step": 1010
},
{
"epoch": 0.59,
"learning_rate": 2.2592113768584355e-07,
"logits/generated": -2.5055289268493652,
"logits/real": -2.5188517570495605,
"logps/generated": -253.8979034423828,
"logps/real": -199.04542541503906,
"loss": 0.1064,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -11.993169784545898,
"rewards/margins": 7.974666595458984,
"rewards/real": -4.018503665924072,
"step": 1020
},
{
"epoch": 0.6,
"learning_rate": 2.226890756302521e-07,
"logits/generated": -2.465848207473755,
"logits/real": -2.450221061706543,
"logps/generated": -267.3594665527344,
"logps/real": -190.57058715820312,
"loss": 0.0993,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -13.37690544128418,
"rewards/margins": 9.04112720489502,
"rewards/real": -4.33577823638916,
"step": 1030
},
{
"epoch": 0.61,
"learning_rate": 2.1945701357466063e-07,
"logits/generated": -2.4809277057647705,
"logits/real": -2.448288917541504,
"logps/generated": -244.9989776611328,
"logps/real": -176.53506469726562,
"loss": 0.138,
"rewards/accuracies": 0.875,
"rewards/generated": -12.209803581237793,
"rewards/margins": 7.888075828552246,
"rewards/real": -4.321727752685547,
"step": 1040
},
{
"epoch": 0.61,
"learning_rate": 2.1622495151906917e-07,
"logits/generated": -2.5043458938598633,
"logits/real": -2.4832687377929688,
"logps/generated": -245.507568359375,
"logps/real": -173.8964385986328,
"loss": 0.0956,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -11.928030014038086,
"rewards/margins": 8.440313339233398,
"rewards/real": -3.4877171516418457,
"step": 1050
},
{
"epoch": 0.62,
"learning_rate": 2.129928894634777e-07,
"logits/generated": -2.5427870750427246,
"logits/real": -2.499741315841675,
"logps/generated": -237.24887084960938,
"logps/real": -168.909423828125,
"loss": 0.1476,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -10.177728652954102,
"rewards/margins": 7.252813816070557,
"rewards/real": -2.9249141216278076,
"step": 1060
},
{
"epoch": 0.62,
"learning_rate": 2.0976082740788623e-07,
"logits/generated": -2.4873366355895996,
"logits/real": -2.4385483264923096,
"logps/generated": -206.4569549560547,
"logps/real": -153.2084503173828,
"loss": 0.1223,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -10.100156784057617,
"rewards/margins": 7.314939975738525,
"rewards/real": -2.785216808319092,
"step": 1070
},
{
"epoch": 0.63,
"learning_rate": 2.0652876535229474e-07,
"logits/generated": -2.5049309730529785,
"logits/real": -2.456437587738037,
"logps/generated": -214.497314453125,
"logps/real": -164.29551696777344,
"loss": 0.0969,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -9.749935150146484,
"rewards/margins": 6.255521774291992,
"rewards/real": -3.4944145679473877,
"step": 1080
},
{
"epoch": 0.63,
"learning_rate": 2.0329670329670329e-07,
"logits/generated": -2.5299432277679443,
"logits/real": -2.425293445587158,
"logps/generated": -239.8467254638672,
"logps/real": -163.84437561035156,
"loss": 0.0989,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.874212265014648,
"rewards/margins": 8.04706859588623,
"rewards/real": -2.8271448612213135,
"step": 1090
},
{
"epoch": 0.64,
"learning_rate": 2.0006464124111183e-07,
"logits/generated": -2.4800021648406982,
"logits/real": -2.451796293258667,
"logps/generated": -237.79653930664062,
"logps/real": -167.1535186767578,
"loss": 0.1332,
"rewards/accuracies": 0.875,
"rewards/generated": -12.058743476867676,
"rewards/margins": 8.454904556274414,
"rewards/real": -3.6038384437561035,
"step": 1100
},
{
"epoch": 0.64,
"eval_logits/generated": -2.4417178630828857,
"eval_logits/real": -2.462984800338745,
"eval_logps/generated": -215.05523681640625,
"eval_logps/real": -162.10159301757812,
"eval_loss": 0.07708299905061722,
"eval_rewards/accuracies": 0.9785031676292419,
"eval_rewards/generated": -11.873842239379883,
"eval_rewards/margins": 9.130241394042969,
"eval_rewards/real": -2.743600368499756,
"eval_runtime": 326.4428,
"eval_samples_per_second": 15.317,
"eval_steps_per_second": 0.481,
"step": 1100
},
{
"epoch": 0.65,
"learning_rate": 1.9683257918552034e-07,
"logits/generated": -2.4482009410858154,
"logits/real": -2.4847419261932373,
"logps/generated": -222.0105438232422,
"logps/real": -158.9528350830078,
"loss": 0.1071,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -11.227673530578613,
"rewards/margins": 8.365415573120117,
"rewards/real": -2.862257480621338,
"step": 1110
},
{
"epoch": 0.65,
"learning_rate": 1.9360051712992888e-07,
"logits/generated": -2.4634220600128174,
"logits/real": -2.4727044105529785,
"logps/generated": -239.70742797851562,
"logps/real": -174.02890014648438,
"loss": 0.0992,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -12.579872131347656,
"rewards/margins": 8.563131332397461,
"rewards/real": -4.016742706298828,
"step": 1120
},
{
"epoch": 0.66,
"learning_rate": 1.903684550743374e-07,
"logits/generated": -2.430037260055542,
"logits/real": -2.390148639678955,
"logps/generated": -231.12417602539062,
"logps/real": -151.89051818847656,
"loss": 0.1022,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -12.785438537597656,
"rewards/margins": 9.053568840026855,
"rewards/real": -3.731870174407959,
"step": 1130
},
{
"epoch": 0.66,
"learning_rate": 1.8713639301874596e-07,
"logits/generated": -2.4577813148498535,
"logits/real": -2.450479030609131,
"logps/generated": -251.19302368164062,
"logps/real": -171.656982421875,
"loss": 0.1237,
"rewards/accuracies": 0.875,
"rewards/generated": -13.755993843078613,
"rewards/margins": 9.537080764770508,
"rewards/real": -4.2189130783081055,
"step": 1140
},
{
"epoch": 0.67,
"learning_rate": 1.839043309631545e-07,
"logits/generated": -2.449868679046631,
"logits/real": -2.485016107559204,
"logps/generated": -256.3826904296875,
"logps/real": -172.46572875976562,
"loss": 0.1185,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -13.964202880859375,
"rewards/margins": 9.694369316101074,
"rewards/real": -4.269833564758301,
"step": 1150
},
{
"epoch": 0.67,
"learning_rate": 1.8067226890756302e-07,
"logits/generated": -2.500181198120117,
"logits/real": -2.4853765964508057,
"logps/generated": -255.6665802001953,
"logps/real": -180.01492309570312,
"loss": 0.0614,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -13.007906913757324,
"rewards/margins": 9.470663070678711,
"rewards/real": -3.537243604660034,
"step": 1160
},
{
"epoch": 0.68,
"learning_rate": 1.7744020685197156e-07,
"logits/generated": -2.451597213745117,
"logits/real": -2.4578232765197754,
"logps/generated": -269.48602294921875,
"logps/real": -180.25762939453125,
"loss": 0.0499,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -14.529942512512207,
"rewards/margins": 11.10853385925293,
"rewards/real": -3.421407699584961,
"step": 1170
},
{
"epoch": 0.69,
"learning_rate": 1.7420814479638007e-07,
"logits/generated": -2.4766621589660645,
"logits/real": -2.428915023803711,
"logps/generated": -268.610107421875,
"logps/real": -183.23977661132812,
"loss": 0.0714,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -13.613744735717773,
"rewards/margins": 8.734598159790039,
"rewards/real": -4.879148006439209,
"step": 1180
},
{
"epoch": 0.69,
"learning_rate": 1.7097608274078861e-07,
"logits/generated": -2.4673948287963867,
"logits/real": -2.426107883453369,
"logps/generated": -268.60968017578125,
"logps/real": -185.16867065429688,
"loss": 0.1291,
"rewards/accuracies": 0.9375,
"rewards/generated": -13.530069351196289,
"rewards/margins": 9.933688163757324,
"rewards/real": -3.5963797569274902,
"step": 1190
},
{
"epoch": 0.7,
"learning_rate": 1.6774402068519713e-07,
"logits/generated": -2.4350733757019043,
"logits/real": -2.3934569358825684,
"logps/generated": -263.73193359375,
"logps/real": -164.91358947753906,
"loss": 0.1007,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -14.513885498046875,
"rewards/margins": 10.701467514038086,
"rewards/real": -3.8124184608459473,
"step": 1200
},
{
"epoch": 0.7,
"eval_logits/generated": -2.3948426246643066,
"eval_logits/real": -2.425471067428589,
"eval_logps/generated": -238.21556091308594,
"eval_logps/real": -168.78074645996094,
"eval_loss": 0.07581960409879684,
"eval_rewards/accuracies": 0.9745222926139832,
"eval_rewards/generated": -14.189876556396484,
"eval_rewards/margins": 10.778358459472656,
"eval_rewards/real": -3.4115185737609863,
"eval_runtime": 324.5114,
"eval_samples_per_second": 15.408,
"eval_steps_per_second": 0.484,
"step": 1200
},
{
"epoch": 0.7,
"learning_rate": 1.6451195862960567e-07,
"logits/generated": -2.4927570819854736,
"logits/real": -2.4464633464813232,
"logps/generated": -231.09664916992188,
"logps/real": -159.4072265625,
"loss": 0.0937,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -11.880216598510742,
"rewards/margins": 8.993806838989258,
"rewards/real": -2.886411190032959,
"step": 1210
},
{
"epoch": 0.71,
"learning_rate": 1.6127989657401424e-07,
"logits/generated": -2.4784765243530273,
"logits/real": -2.47477650642395,
"logps/generated": -233.77877807617188,
"logps/real": -159.3743438720703,
"loss": 0.0946,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -11.6648588180542,
"rewards/margins": 8.083456039428711,
"rewards/real": -3.5814037322998047,
"step": 1220
},
{
"epoch": 0.72,
"learning_rate": 1.5804783451842275e-07,
"logits/generated": -2.490647077560425,
"logits/real": -2.463700532913208,
"logps/generated": -277.70172119140625,
"logps/real": -174.46707153320312,
"loss": 0.1369,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -15.150970458984375,
"rewards/margins": 11.056761741638184,
"rewards/real": -4.094208717346191,
"step": 1230
},
{
"epoch": 0.72,
"learning_rate": 1.548157724628313e-07,
"logits/generated": -2.551166534423828,
"logits/real": -2.5086779594421387,
"logps/generated": -257.8534240722656,
"logps/real": -183.65255737304688,
"loss": 0.1252,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -13.366009712219238,
"rewards/margins": 9.787813186645508,
"rewards/real": -3.578195095062256,
"step": 1240
},
{
"epoch": 0.73,
"learning_rate": 1.515837104072398e-07,
"logits/generated": -2.5603954792022705,
"logits/real": -2.539309501647949,
"logps/generated": -229.6404266357422,
"logps/real": -172.22093200683594,
"loss": 0.1479,
"rewards/accuracies": 0.824999988079071,
"rewards/generated": -11.011828422546387,
"rewards/margins": 7.343997955322266,
"rewards/real": -3.6678295135498047,
"step": 1250
},
{
"epoch": 0.73,
"learning_rate": 1.4835164835164835e-07,
"logits/generated": -2.575138568878174,
"logits/real": -2.495459794998169,
"logps/generated": -220.6327362060547,
"logps/real": -144.017333984375,
"loss": 0.0779,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -10.787625312805176,
"rewards/margins": 8.316181182861328,
"rewards/real": -2.4714438915252686,
"step": 1260
},
{
"epoch": 0.74,
"learning_rate": 1.451195862960569e-07,
"logits/generated": -2.5613296031951904,
"logits/real": -2.569815158843994,
"logps/generated": -232.4954071044922,
"logps/real": -175.23074340820312,
"loss": 0.0922,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -11.065740585327148,
"rewards/margins": 7.911036491394043,
"rewards/real": -3.1547024250030518,
"step": 1270
},
{
"epoch": 0.74,
"learning_rate": 1.418875242404654e-07,
"logits/generated": -2.539595365524292,
"logits/real": -2.5092930793762207,
"logps/generated": -220.0555419921875,
"logps/real": -165.4029083251953,
"loss": 0.1347,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -10.106549263000488,
"rewards/margins": 7.346714019775391,
"rewards/real": -2.7598352432250977,
"step": 1280
},
{
"epoch": 0.75,
"learning_rate": 1.3865546218487394e-07,
"logits/generated": -2.6019704341888428,
"logits/real": -2.5571720600128174,
"logps/generated": -237.4385223388672,
"logps/real": -167.28253173828125,
"loss": 0.0784,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -10.303973197937012,
"rewards/margins": 8.126482963562012,
"rewards/real": -2.1774911880493164,
"step": 1290
},
{
"epoch": 0.76,
"learning_rate": 1.3542340012928246e-07,
"logits/generated": -2.5232183933258057,
"logits/real": -2.523380756378174,
"logps/generated": -240.03897094726562,
"logps/real": -154.43798828125,
"loss": 0.1306,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -12.480694770812988,
"rewards/margins": 9.408671379089355,
"rewards/real": -3.072023868560791,
"step": 1300
},
{
"epoch": 0.76,
"eval_logits/generated": -2.5270004272460938,
"eval_logits/real": -2.537477493286133,
"eval_logps/generated": -207.37860107421875,
"eval_logps/real": -158.70806884765625,
"eval_loss": 0.07650701701641083,
"eval_rewards/accuracies": 0.9753184914588928,
"eval_rewards/generated": -11.106179237365723,
"eval_rewards/margins": 8.70193099975586,
"eval_rewards/real": -2.404249906539917,
"eval_runtime": 324.1,
"eval_samples_per_second": 15.427,
"eval_steps_per_second": 0.484,
"step": 1300
},
{
"epoch": 0.76,
"learning_rate": 1.3219133807369102e-07,
"logits/generated": -2.5183393955230713,
"logits/real": -2.483584403991699,
"logps/generated": -237.892578125,
"logps/real": -154.9105682373047,
"loss": 0.1098,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -12.671589851379395,
"rewards/margins": 10.110515594482422,
"rewards/real": -2.561074733734131,
"step": 1310
},
{
"epoch": 0.77,
"learning_rate": 1.2895927601809956e-07,
"logits/generated": -2.5193591117858887,
"logits/real": -2.546970844268799,
"logps/generated": -238.3896484375,
"logps/real": -164.31768798828125,
"loss": 0.0707,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.500856399536133,
"rewards/margins": 9.552709579467773,
"rewards/real": -2.9481449127197266,
"step": 1320
},
{
"epoch": 0.77,
"learning_rate": 1.2572721396250808e-07,
"logits/generated": -2.5152533054351807,
"logits/real": -2.501941442489624,
"logps/generated": -221.7640380859375,
"logps/real": -161.3094940185547,
"loss": 0.1377,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -10.399030685424805,
"rewards/margins": 7.858689785003662,
"rewards/real": -2.540339946746826,
"step": 1330
},
{
"epoch": 0.78,
"learning_rate": 1.224951519069166e-07,
"logits/generated": -2.5427238941192627,
"logits/real": -2.5108070373535156,
"logps/generated": -258.5174560546875,
"logps/real": -167.56686401367188,
"loss": 0.1183,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -12.709803581237793,
"rewards/margins": 10.17547607421875,
"rewards/real": -2.5343270301818848,
"step": 1340
},
{
"epoch": 0.79,
"learning_rate": 1.1926308985132513e-07,
"logits/generated": -2.528041124343872,
"logits/real": -2.5164339542388916,
"logps/generated": -224.2671356201172,
"logps/real": -177.60060119628906,
"loss": 0.0929,
"rewards/accuracies": 0.875,
"rewards/generated": -10.380260467529297,
"rewards/margins": 7.477629661560059,
"rewards/real": -2.9026293754577637,
"step": 1350
},
{
"epoch": 0.79,
"learning_rate": 1.1603102779573367e-07,
"logits/generated": -2.518709421157837,
"logits/real": -2.4333877563476562,
"logps/generated": -222.98892211914062,
"logps/real": -153.88491821289062,
"loss": 0.0828,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.736051559448242,
"rewards/margins": 7.6436614990234375,
"rewards/real": -3.092390537261963,
"step": 1360
},
{
"epoch": 0.8,
"learning_rate": 1.127989657401422e-07,
"logits/generated": -2.5053441524505615,
"logits/real": -2.4574134349823,
"logps/generated": -243.2651824951172,
"logps/real": -179.1326904296875,
"loss": 0.1207,
"rewards/accuracies": 0.875,
"rewards/generated": -12.284380912780762,
"rewards/margins": 8.803535461425781,
"rewards/real": -3.4808456897735596,
"step": 1370
},
{
"epoch": 0.8,
"learning_rate": 1.0956690368455074e-07,
"logits/generated": -2.4903757572174072,
"logits/real": -2.5189337730407715,
"logps/generated": -226.8483428955078,
"logps/real": -171.20724487304688,
"loss": 0.0713,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -11.636832237243652,
"rewards/margins": 8.194231033325195,
"rewards/real": -3.4426021575927734,
"step": 1380
},
{
"epoch": 0.81,
"learning_rate": 1.0633484162895927e-07,
"logits/generated": -2.5010979175567627,
"logits/real": -2.4416332244873047,
"logps/generated": -225.2292938232422,
"logps/real": -172.9666748046875,
"loss": 0.0973,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -10.62346363067627,
"rewards/margins": 6.646616458892822,
"rewards/real": -3.976848602294922,
"step": 1390
},
{
"epoch": 0.81,
"learning_rate": 1.031027795733678e-07,
"logits/generated": -2.489281177520752,
"logits/real": -2.480539560317993,
"logps/generated": -241.0604248046875,
"logps/real": -165.22604370117188,
"loss": 0.1084,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -12.310083389282227,
"rewards/margins": 9.358478546142578,
"rewards/real": -2.951603651046753,
"step": 1400
},
{
"epoch": 0.81,
"eval_logits/generated": -2.476224422454834,
"eval_logits/real": -2.484823226928711,
"eval_logps/generated": -220.34217834472656,
"eval_logps/real": -162.47093200683594,
"eval_loss": 0.07595483213663101,
"eval_rewards/accuracies": 0.9745222926139832,
"eval_rewards/generated": -12.402539253234863,
"eval_rewards/margins": 9.622005462646484,
"eval_rewards/real": -2.780533790588379,
"eval_runtime": 325.3302,
"eval_samples_per_second": 15.369,
"eval_steps_per_second": 0.483,
"step": 1400
},
{
"epoch": 0.82,
"learning_rate": 9.987071751777634e-08,
"logits/generated": -2.517509937286377,
"logits/real": -2.5105528831481934,
"logps/generated": -220.71542358398438,
"logps/real": -163.03253173828125,
"loss": 0.0891,
"rewards/accuracies": 0.9375,
"rewards/generated": -10.003129959106445,
"rewards/margins": 6.5026068687438965,
"rewards/real": -3.500523328781128,
"step": 1410
},
{
"epoch": 0.83,
"learning_rate": 9.663865546218488e-08,
"logits/generated": -2.494823694229126,
"logits/real": -2.4722931385040283,
"logps/generated": -218.6509552001953,
"logps/real": -150.20521545410156,
"loss": 0.0958,
"rewards/accuracies": 0.9375,
"rewards/generated": -11.17399787902832,
"rewards/margins": 8.31296157836914,
"rewards/real": -2.861036777496338,
"step": 1420
},
{
"epoch": 0.83,
"learning_rate": 9.340659340659341e-08,
"logits/generated": -2.4990344047546387,
"logits/real": -2.4319558143615723,
"logps/generated": -241.905517578125,
"logps/real": -166.25537109375,
"loss": 0.1237,
"rewards/accuracies": 0.875,
"rewards/generated": -12.346821784973145,
"rewards/margins": 8.885249137878418,
"rewards/real": -3.461573839187622,
"step": 1430
},
{
"epoch": 0.84,
"learning_rate": 9.017453135100193e-08,
"logits/generated": -2.4997031688690186,
"logits/real": -2.510288715362549,
"logps/generated": -258.8919677734375,
"logps/real": -195.13092041015625,
"loss": 0.0683,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -13.229423522949219,
"rewards/margins": 9.894552230834961,
"rewards/real": -3.3348708152770996,
"step": 1440
},
{
"epoch": 0.84,
"learning_rate": 8.694246929541046e-08,
"logits/generated": -2.4908547401428223,
"logits/real": -2.469104051589966,
"logps/generated": -258.60723876953125,
"logps/real": -183.93992614746094,
"loss": 0.1175,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -12.868890762329102,
"rewards/margins": 8.966911315917969,
"rewards/real": -3.9019787311553955,
"step": 1450
},
{
"epoch": 0.85,
"learning_rate": 8.371040723981899e-08,
"logits/generated": -2.4881350994110107,
"logits/real": -2.4609992504119873,
"logps/generated": -257.45709228515625,
"logps/real": -172.5931396484375,
"loss": 0.0944,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -13.51276969909668,
"rewards/margins": 9.734931945800781,
"rewards/real": -3.777839183807373,
"step": 1460
},
{
"epoch": 0.86,
"learning_rate": 8.047834518422754e-08,
"logits/generated": -2.5072503089904785,
"logits/real": -2.4428117275238037,
"logps/generated": -254.014892578125,
"logps/real": -170.81788635253906,
"loss": 0.0791,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -12.408576011657715,
"rewards/margins": 8.652534484863281,
"rewards/real": -3.756040573120117,
"step": 1470
},
{
"epoch": 0.86,
"learning_rate": 7.724628312863607e-08,
"logits/generated": -2.4876208305358887,
"logits/real": -2.463144302368164,
"logps/generated": -272.12139892578125,
"logps/real": -181.99913024902344,
"loss": 0.0929,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -14.419462203979492,
"rewards/margins": 10.72993278503418,
"rewards/real": -3.68953013420105,
"step": 1480
},
{
"epoch": 0.87,
"learning_rate": 7.40142210730446e-08,
"logits/generated": -2.4878337383270264,
"logits/real": -2.502197504043579,
"logps/generated": -257.6641845703125,
"logps/real": -180.44590759277344,
"loss": 0.0694,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -12.998161315917969,
"rewards/margins": 10.025131225585938,
"rewards/real": -2.9730300903320312,
"step": 1490
},
{
"epoch": 0.87,
"learning_rate": 7.078215901745313e-08,
"logits/generated": -2.4765686988830566,
"logits/real": -2.459664821624756,
"logps/generated": -239.1787109375,
"logps/real": -167.0284423828125,
"loss": 0.1494,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -11.897302627563477,
"rewards/margins": 8.363332748413086,
"rewards/real": -3.5339698791503906,
"step": 1500
},
{
"epoch": 0.87,
"eval_logits/generated": -2.465639591217041,
"eval_logits/real": -2.4750711917877197,
"eval_logps/generated": -226.3308868408203,
"eval_logps/real": -164.7202606201172,
"eval_loss": 0.0739506259560585,
"eval_rewards/accuracies": 0.9713375568389893,
"eval_rewards/generated": -13.001410484313965,
"eval_rewards/margins": 9.995938301086426,
"eval_rewards/real": -3.005469799041748,
"eval_runtime": 325.1178,
"eval_samples_per_second": 15.379,
"eval_steps_per_second": 0.483,
"step": 1500
},
{
"epoch": 0.88,
"learning_rate": 6.755009696186167e-08,
"logits/generated": -2.4574570655822754,
"logits/real": -2.478517770767212,
"logps/generated": -235.34963989257812,
"logps/real": -165.80545043945312,
"loss": 0.0921,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.185359001159668,
"rewards/margins": 8.462077140808105,
"rewards/real": -3.723281145095825,
"step": 1510
},
{
"epoch": 0.88,
"learning_rate": 6.43180349062702e-08,
"logits/generated": -2.467890501022339,
"logits/real": -2.449784994125366,
"logps/generated": -265.53546142578125,
"logps/real": -190.40701293945312,
"loss": 0.0903,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -14.360349655151367,
"rewards/margins": 10.199980735778809,
"rewards/real": -4.160367965698242,
"step": 1520
},
{
"epoch": 0.89,
"learning_rate": 6.108597285067872e-08,
"logits/generated": -2.4615697860717773,
"logits/real": -2.3793063163757324,
"logps/generated": -239.954833984375,
"logps/real": -161.84376525878906,
"loss": 0.0975,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -12.927061080932617,
"rewards/margins": 8.876288414001465,
"rewards/real": -4.050771236419678,
"step": 1530
},
{
"epoch": 0.9,
"learning_rate": 5.785391079508726e-08,
"logits/generated": -2.4796242713928223,
"logits/real": -2.4357521533966064,
"logps/generated": -227.2936248779297,
"logps/real": -163.6289825439453,
"loss": 0.0818,
"rewards/accuracies": 0.9375,
"rewards/generated": -11.878579139709473,
"rewards/margins": 8.282608985900879,
"rewards/real": -3.595970630645752,
"step": 1540
},
{
"epoch": 0.9,
"learning_rate": 5.46218487394958e-08,
"logits/generated": -2.4726433753967285,
"logits/real": -2.3735179901123047,
"logps/generated": -239.4695587158203,
"logps/real": -159.3350372314453,
"loss": 0.0978,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.301145553588867,
"rewards/margins": 8.831232070922852,
"rewards/real": -3.4699130058288574,
"step": 1550
},
{
"epoch": 0.91,
"learning_rate": 5.1389786683904325e-08,
"logits/generated": -2.488933801651001,
"logits/real": -2.4772191047668457,
"logps/generated": -250.51992797851562,
"logps/real": -159.865478515625,
"loss": 0.0695,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -12.80781364440918,
"rewards/margins": 9.52310848236084,
"rewards/real": -3.2847042083740234,
"step": 1560
},
{
"epoch": 0.91,
"learning_rate": 4.8157724628312865e-08,
"logits/generated": -2.5149645805358887,
"logits/real": -2.483194589614868,
"logps/generated": -259.7389221191406,
"logps/real": -188.4167022705078,
"loss": 0.0978,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -12.594181060791016,
"rewards/margins": 8.321154594421387,
"rewards/real": -4.273025035858154,
"step": 1570
},
{
"epoch": 0.92,
"learning_rate": 4.492566257272139e-08,
"logits/generated": -2.498760461807251,
"logits/real": -2.4244179725646973,
"logps/generated": -255.7240753173828,
"logps/real": -168.46018981933594,
"loss": 0.1101,
"rewards/accuracies": 0.9375,
"rewards/generated": -12.637785911560059,
"rewards/margins": 8.729381561279297,
"rewards/real": -3.9084041118621826,
"step": 1580
},
{
"epoch": 0.92,
"learning_rate": 4.169360051712993e-08,
"logits/generated": -2.4851748943328857,
"logits/real": -2.429105043411255,
"logps/generated": -242.70553588867188,
"logps/real": -165.17166137695312,
"loss": 0.0845,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -12.373160362243652,
"rewards/margins": 8.976916313171387,
"rewards/real": -3.3962435722351074,
"step": 1590
},
{
"epoch": 0.93,
"learning_rate": 3.846153846153846e-08,
"logits/generated": -2.468567371368408,
"logits/real": -2.375533103942871,
"logps/generated": -254.6434326171875,
"logps/real": -174.11781311035156,
"loss": 0.1099,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -13.183265686035156,
"rewards/margins": 8.735334396362305,
"rewards/real": -4.447932243347168,
"step": 1600
},
{
"epoch": 0.93,
"eval_logits/generated": -2.425264358520508,
"eval_logits/real": -2.4319591522216797,
"eval_logps/generated": -233.0531768798828,
"eval_logps/real": -169.63656616210938,
"eval_loss": 0.07743819802999496,
"eval_rewards/accuracies": 0.9729299545288086,
"eval_rewards/generated": -13.673635482788086,
"eval_rewards/margins": 10.176533699035645,
"eval_rewards/real": -3.497100591659546,
"eval_runtime": 325.5057,
"eval_samples_per_second": 15.361,
"eval_steps_per_second": 0.482,
"step": 1600
},
{
"epoch": 0.94,
"learning_rate": 3.5229476405946995e-08,
"logits/generated": -2.4387266635894775,
"logits/real": -2.408844470977783,
"logps/generated": -240.92184448242188,
"logps/real": -162.61293029785156,
"loss": 0.1445,
"rewards/accuracies": 0.875,
"rewards/generated": -13.79778003692627,
"rewards/margins": 10.461742401123047,
"rewards/real": -3.3360390663146973,
"step": 1610
},
{
"epoch": 0.94,
"learning_rate": 3.199741435035552e-08,
"logits/generated": -2.44557785987854,
"logits/real": -2.4553208351135254,
"logps/generated": -238.5950164794922,
"logps/real": -176.37416076660156,
"loss": 0.1105,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -12.455824851989746,
"rewards/margins": 8.037620544433594,
"rewards/real": -4.418205261230469,
"step": 1620
},
{
"epoch": 0.95,
"learning_rate": 2.8765352294764057e-08,
"logits/generated": -2.457484006881714,
"logits/real": -2.424367904663086,
"logps/generated": -241.5567626953125,
"logps/real": -170.40658569335938,
"loss": 0.1305,
"rewards/accuracies": 0.8125,
"rewards/generated": -12.365758895874023,
"rewards/margins": 8.254980087280273,
"rewards/real": -4.110778331756592,
"step": 1630
},
{
"epoch": 0.95,
"learning_rate": 2.553329023917259e-08,
"logits/generated": -2.4767932891845703,
"logits/real": -2.4457132816314697,
"logps/generated": -255.2021026611328,
"logps/real": -187.205078125,
"loss": 0.0809,
"rewards/accuracies": 0.875,
"rewards/generated": -12.562596321105957,
"rewards/margins": 8.74045467376709,
"rewards/real": -3.82214093208313,
"step": 1640
},
{
"epoch": 0.96,
"learning_rate": 2.2301228183581126e-08,
"logits/generated": -2.4593024253845215,
"logits/real": -2.4330244064331055,
"logps/generated": -261.88787841796875,
"logps/real": -179.29541015625,
"loss": 0.1031,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -13.55891227722168,
"rewards/margins": 9.299590110778809,
"rewards/real": -4.259322166442871,
"step": 1650
},
{
"epoch": 0.97,
"learning_rate": 1.906916612798966e-08,
"logits/generated": -2.467500925064087,
"logits/real": -2.4541561603546143,
"logps/generated": -245.1857452392578,
"logps/real": -171.3612060546875,
"loss": 0.1162,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -12.426248550415039,
"rewards/margins": 8.478652000427246,
"rewards/real": -3.9475975036621094,
"step": 1660
},
{
"epoch": 0.97,
"learning_rate": 1.5837104072398187e-08,
"logits/generated": -2.459001064300537,
"logits/real": -2.430206775665283,
"logps/generated": -254.04745483398438,
"logps/real": -169.44154357910156,
"loss": 0.0977,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": -13.104756355285645,
"rewards/margins": 9.2105073928833,
"rewards/real": -3.8942489624023438,
"step": 1670
},
{
"epoch": 0.98,
"learning_rate": 1.2605042016806723e-08,
"logits/generated": -2.448197364807129,
"logits/real": -2.455432891845703,
"logps/generated": -263.8450622558594,
"logps/real": -177.14439392089844,
"loss": 0.0979,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -13.550898551940918,
"rewards/margins": 9.719705581665039,
"rewards/real": -3.831193208694458,
"step": 1680
},
{
"epoch": 0.98,
"learning_rate": 9.372979961215254e-09,
"logits/generated": -2.4577107429504395,
"logits/real": -2.4188618659973145,
"logps/generated": -236.26318359375,
"logps/real": -172.19528198242188,
"loss": 0.0944,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -11.246160507202148,
"rewards/margins": 7.3128533363342285,
"rewards/real": -3.93330717086792,
"step": 1690
},
{
"epoch": 0.99,
"learning_rate": 6.140917905623787e-09,
"logits/generated": -2.471553325653076,
"logits/real": -2.3995721340179443,
"logps/generated": -262.373291015625,
"logps/real": -165.32789611816406,
"loss": 0.0906,
"rewards/accuracies": 0.9375,
"rewards/generated": -13.321420669555664,
"rewards/margins": 10.259626388549805,
"rewards/real": -3.0617949962615967,
"step": 1700
},
{
"epoch": 0.99,
"eval_logits/generated": -2.4198453426361084,
"eval_logits/real": -2.423133373260498,
"eval_logps/generated": -228.7878875732422,
"eval_logps/real": -165.37667846679688,
"eval_loss": 0.07379047572612762,
"eval_rewards/accuracies": 0.9713375568389893,
"eval_rewards/generated": -13.247109413146973,
"eval_rewards/margins": 10.17599868774414,
"eval_rewards/real": -3.0711097717285156,
"eval_runtime": 325.2984,
"eval_samples_per_second": 15.371,
"eval_steps_per_second": 0.483,
"step": 1700
},
{
"epoch": 0.99,
"learning_rate": 2.9088558500323206e-09,
"logits/generated": -2.4227101802825928,
"logits/real": -2.464235305786133,
"logps/generated": -243.1490020751953,
"logps/real": -169.15988159179688,
"loss": 0.0914,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -12.564062118530273,
"rewards/margins": 8.887134552001953,
"rewards/real": -3.676928758621216,
"step": 1710
},
{
"epoch": 1.0,
"step": 1719,
"total_flos": 0.0,
"train_loss": 0.16738235295130943,
"train_runtime": 14752.9454,
"train_samples_per_second": 3.728,
"train_steps_per_second": 0.117
}
],
"logging_steps": 10,
"max_steps": 1719,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}