{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 1719, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9069767441860465e-09, "logits/generated": -3.012260675430298, "logits/real": -2.981379270553589, "logps/generated": -121.78553009033203, "logps/real": -157.20819091796875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.9069767441860464e-08, "logits/generated": -2.961106538772583, "logits/real": -2.9408955574035645, "logps/generated": -125.34223175048828, "logps/real": -137.5188446044922, "loss": 0.6908, "rewards/accuracies": 0.5555555820465088, "rewards/generated": -0.0030116664711385965, "rewards/margins": 0.01261158287525177, "rewards/real": 0.009599916636943817, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.813953488372093e-08, "logits/generated": -2.963073253631592, "logits/real": -2.9351158142089844, "logps/generated": -122.87374114990234, "logps/real": -133.8837127685547, "loss": 0.6375, "rewards/accuracies": 0.6000000238418579, "rewards/generated": -0.058080412447452545, "rewards/margins": 0.14583885669708252, "rewards/real": 0.08775845915079117, "step": 20 }, { "epoch": 0.02, "learning_rate": 8.720930232558139e-08, "logits/generated": -2.9640278816223145, "logits/real": -2.9266650676727295, "logps/generated": -115.86125183105469, "logps/real": -129.8009796142578, "loss": 0.5498, "rewards/accuracies": 0.8125, "rewards/generated": -0.30484524369239807, "rewards/margins": 0.5773354768753052, "rewards/real": 0.2724902033805847, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.1627906976744186e-07, "logits/generated": -2.9708516597747803, "logits/real": -2.8813812732696533, "logps/generated": -122.1348876953125, "logps/real": -123.1031723022461, "loss": 0.5168, "rewards/accuracies": 0.75, "rewards/generated": -0.330232709646225, "rewards/margins": 0.7562737464904785, "rewards/real": 0.42604103684425354, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.4534883720930232e-07, "logits/generated": -2.961805820465088, "logits/real": -2.8516037464141846, "logps/generated": -130.59262084960938, "logps/real": -131.277099609375, "loss": 0.4904, "rewards/accuracies": 0.75, "rewards/generated": -0.683895468711853, "rewards/margins": 1.2348084449768066, "rewards/real": 0.5509130358695984, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.7441860465116279e-07, "logits/generated": -2.8933191299438477, "logits/real": -2.8156707286834717, "logps/generated": -131.11270141601562, "logps/real": -138.29629516601562, "loss": 0.4853, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -1.2479474544525146, "rewards/margins": 1.7186332941055298, "rewards/real": 0.4706856608390808, "step": 60 }, { "epoch": 0.04, "learning_rate": 2.0348837209302325e-07, "logits/generated": -2.84045672416687, "logits/real": -2.816912889480591, "logps/generated": -129.1736297607422, "logps/real": -138.9403076171875, "loss": 0.4151, "rewards/accuracies": 0.8125, "rewards/generated": -1.7937465906143188, "rewards/margins": 2.168488025665283, "rewards/real": 0.3747415244579315, "step": 70 }, { "epoch": 0.05, "learning_rate": 2.3255813953488372e-07, "logits/generated": -2.8057825565338135, "logits/real": -2.7365589141845703, "logps/generated": -134.1271209716797, "logps/real": -128.5058135986328, "loss": 0.4281, "rewards/accuracies": 0.762499988079071, "rewards/generated": -2.476748466491699, "rewards/margins": 2.6212470531463623, "rewards/real": 0.14449895918369293, "step": 80 }, { "epoch": 0.05, "learning_rate": 2.616279069767442e-07, "logits/generated": -2.8339638710021973, "logits/real": -2.7323813438415527, "logps/generated": -161.02420043945312, "logps/real": -132.14015197753906, "loss": 0.3775, "rewards/accuracies": 0.8125, "rewards/generated": -3.9137425422668457, "rewards/margins": 3.994602918624878, "rewards/real": 0.0808596983551979, "step": 90 }, { "epoch": 0.06, "learning_rate": 2.9069767441860464e-07, "logits/generated": -2.744694948196411, "logits/real": -2.7067599296569824, "logps/generated": -170.92324829101562, "logps/real": -138.30003356933594, "loss": 0.3742, "rewards/accuracies": 0.75, "rewards/generated": -4.676175594329834, "rewards/margins": 4.555473804473877, "rewards/real": -0.12070190906524658, "step": 100 }, { "epoch": 0.06, "eval_logits/generated": -2.743514060974121, "eval_logits/real": -2.7147889137268066, "eval_logps/generated": -163.19664001464844, "eval_logps/real": -138.36029052734375, "eval_loss": 0.22440293431282043, "eval_rewards/accuracies": 0.9657643437385559, "eval_rewards/generated": -6.68798303604126, "eval_rewards/margins": 6.318512439727783, "eval_rewards/real": -0.3694704473018646, "eval_runtime": 332.6054, "eval_samples_per_second": 15.033, "eval_steps_per_second": 0.472, "step": 100 }, { "epoch": 0.06, "learning_rate": 3.1976744186046514e-07, "logits/generated": -2.7230546474456787, "logits/real": -2.6995229721069336, "logps/generated": -147.0129852294922, "logps/real": -138.2716064453125, "loss": 0.3616, "rewards/accuracies": 0.75, "rewards/generated": -3.036238193511963, "rewards/margins": 2.524616003036499, "rewards/real": -0.5116221904754639, "step": 110 }, { "epoch": 0.07, "learning_rate": 3.4883720930232557e-07, "logits/generated": -2.7442073822021484, "logits/real": -2.6913013458251953, "logps/generated": -181.04037475585938, "logps/real": -129.39993286132812, "loss": 0.353, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -6.460757255554199, "rewards/margins": 6.2252020835876465, "rewards/real": -0.2355557233095169, "step": 120 }, { "epoch": 0.08, "learning_rate": 3.77906976744186e-07, "logits/generated": -2.718548536300659, "logits/real": -2.6518301963806152, "logps/generated": -178.575439453125, "logps/real": -130.4645233154297, "loss": 0.3546, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -6.095456123352051, "rewards/margins": 5.877336502075195, "rewards/real": -0.2181190699338913, "step": 130 }, { "epoch": 0.08, "learning_rate": 4.069767441860465e-07, "logits/generated": -2.6532301902770996, "logits/real": -2.608750820159912, "logps/generated": -195.4745330810547, "logps/real": -143.8194580078125, "loss": 0.3001, "rewards/accuracies": 0.8125, "rewards/generated": -7.106230735778809, "rewards/margins": 6.365324974060059, "rewards/real": -0.7409064173698425, "step": 140 }, { "epoch": 0.09, "learning_rate": 4.3604651162790694e-07, "logits/generated": -2.59714674949646, "logits/real": -2.526071310043335, "logps/generated": -216.6206817626953, "logps/real": -126.90464782714844, "loss": 0.3096, "rewards/accuracies": 0.824999988079071, "rewards/generated": -10.569665908813477, "rewards/margins": 9.900853157043457, "rewards/real": -0.6688116788864136, "step": 150 }, { "epoch": 0.09, "learning_rate": 4.6511627906976743e-07, "logits/generated": -2.5910415649414062, "logits/real": -2.5469748973846436, "logps/generated": -186.9461669921875, "logps/real": -154.80783081054688, "loss": 0.2858, "rewards/accuracies": 0.8125, "rewards/generated": -6.714383125305176, "rewards/margins": 5.151293754577637, "rewards/real": -1.5630899667739868, "step": 160 }, { "epoch": 0.1, "learning_rate": 4.941860465116279e-07, "logits/generated": -2.5762853622436523, "logits/real": -2.506405830383301, "logps/generated": -200.08053588867188, "logps/real": -147.4757843017578, "loss": 0.2498, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -8.602258682250977, "rewards/margins": 7.1432204246521, "rewards/real": -1.4590368270874023, "step": 170 }, { "epoch": 0.1, "learning_rate": 4.974143503555268e-07, "logits/generated": -2.57206392288208, "logits/real": -2.546504497528076, "logps/generated": -240.4123077392578, "logps/real": -146.31198120117188, "loss": 0.306, "rewards/accuracies": 0.8125, "rewards/generated": -11.691746711730957, "rewards/margins": 10.91369915008545, "rewards/real": -0.778047502040863, "step": 180 }, { "epoch": 0.11, "learning_rate": 4.941822882999354e-07, "logits/generated": -2.58891224861145, "logits/real": -2.5609164237976074, "logps/generated": -281.37774658203125, "logps/real": -154.63778686523438, "loss": 0.2973, "rewards/accuracies": 0.800000011920929, "rewards/generated": -15.484718322753906, "rewards/margins": 14.176872253417969, "rewards/real": -1.307844877243042, "step": 190 }, { "epoch": 0.12, "learning_rate": 4.909502262443438e-07, "logits/generated": -2.530224561691284, "logits/real": -2.517199993133545, "logps/generated": -273.39190673828125, "logps/real": -140.16693115234375, "loss": 0.2528, "rewards/accuracies": 0.800000011920929, "rewards/generated": -15.023447036743164, "rewards/margins": 14.073616027832031, "rewards/real": -0.9498294591903687, "step": 200 }, { "epoch": 0.12, "eval_logits/generated": -2.457301378250122, "eval_logits/real": -2.4670825004577637, "eval_logps/generated": -274.8525085449219, "eval_logps/real": -147.06544494628906, "eval_loss": 0.13190196454524994, "eval_rewards/accuracies": 0.9697452187538147, "eval_rewards/generated": -17.85356903076172, "eval_rewards/margins": 16.61358070373535, "eval_rewards/real": -1.2399863004684448, "eval_runtime": 327.4999, "eval_samples_per_second": 15.267, "eval_steps_per_second": 0.479, "step": 200 }, { "epoch": 0.12, "learning_rate": 4.877181641887524e-07, "logits/generated": -2.4890782833099365, "logits/real": -2.429434299468994, "logps/generated": -314.6974792480469, "logps/real": -135.95115661621094, "loss": 0.2686, "rewards/accuracies": 0.862500011920929, "rewards/generated": -19.41824722290039, "rewards/margins": 18.349300384521484, "rewards/real": -1.0689440965652466, "step": 210 }, { "epoch": 0.13, "learning_rate": 4.84486102133161e-07, "logits/generated": -2.4631810188293457, "logits/real": -2.4491894245147705, "logps/generated": -289.0598449707031, "logps/real": -143.9029998779297, "loss": 0.2482, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -16.25137710571289, "rewards/margins": 15.00184440612793, "rewards/real": -1.2495319843292236, "step": 220 }, { "epoch": 0.13, "learning_rate": 4.812540400775695e-07, "logits/generated": -2.45487642288208, "logits/real": -2.4820148944854736, "logps/generated": -251.5857696533203, "logps/real": -144.88449096679688, "loss": 0.2956, "rewards/accuracies": 0.762499988079071, "rewards/generated": -13.020822525024414, "rewards/margins": 11.935991287231445, "rewards/real": -1.0848290920257568, "step": 230 }, { "epoch": 0.14, "learning_rate": 4.78021978021978e-07, "logits/generated": -2.500182867050171, "logits/real": -2.5177927017211914, "logps/generated": -212.17990112304688, "logps/real": -164.4898223876953, "loss": 0.2426, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -8.700544357299805, "rewards/margins": 7.066586971282959, "rewards/real": -1.6339576244354248, "step": 240 }, { "epoch": 0.15, "learning_rate": 4.747899159663865e-07, "logits/generated": -2.469505548477173, "logits/real": -2.513817548751831, "logps/generated": -250.53787231445312, "logps/real": -166.6981658935547, "loss": 0.2077, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -13.388379096984863, "rewards/margins": 11.607447624206543, "rewards/real": -1.780932068824768, "step": 250 }, { "epoch": 0.15, "learning_rate": 4.7155785391079506e-07, "logits/generated": -2.5186924934387207, "logits/real": -2.4782614707946777, "logps/generated": -267.3475646972656, "logps/real": -157.3760223388672, "loss": 0.2576, "rewards/accuracies": 0.800000011920929, "rewards/generated": -14.050939559936523, "rewards/margins": 11.882719039916992, "rewards/real": -2.1682217121124268, "step": 260 }, { "epoch": 0.16, "learning_rate": 4.683257918552036e-07, "logits/generated": -2.5240137577056885, "logits/real": -2.484891653060913, "logps/generated": -282.6662902832031, "logps/real": -164.34274291992188, "loss": 0.2201, "rewards/accuracies": 0.7875000238418579, "rewards/generated": -16.37204360961914, "rewards/margins": 14.235898971557617, "rewards/real": -2.1361422538757324, "step": 270 }, { "epoch": 0.16, "learning_rate": 4.6509372979961214e-07, "logits/generated": -2.4790737628936768, "logits/real": -2.4403738975524902, "logps/generated": -236.24008178710938, "logps/real": -155.16851806640625, "loss": 0.2114, "rewards/accuracies": 0.824999988079071, "rewards/generated": -12.50011157989502, "rewards/margins": 11.145486831665039, "rewards/real": -1.354625940322876, "step": 280 }, { "epoch": 0.17, "learning_rate": 4.618616677440207e-07, "logits/generated": -2.430192232131958, "logits/real": -2.3869004249572754, "logps/generated": -249.726806640625, "logps/real": -151.4705352783203, "loss": 0.2224, "rewards/accuracies": 0.875, "rewards/generated": -13.20459270477295, "rewards/margins": 11.15159797668457, "rewards/real": -2.0529935359954834, "step": 290 }, { "epoch": 0.17, "learning_rate": 4.5862960568842917e-07, "logits/generated": -2.480708122253418, "logits/real": -2.369931697845459, "logps/generated": -282.884033203125, "logps/real": -145.2495574951172, "loss": 0.2066, "rewards/accuracies": 0.887499988079071, "rewards/generated": -15.511686325073242, "rewards/margins": 13.962892532348633, "rewards/real": -1.5487936735153198, "step": 300 }, { "epoch": 0.17, "eval_logits/generated": -2.425682783126831, "eval_logits/real": -2.3622233867645264, "eval_logps/generated": -293.674560546875, "eval_logps/real": -151.37994384765625, "eval_loss": 0.11716413497924805, "eval_rewards/accuracies": 0.9617834687232971, "eval_rewards/generated": -19.735776901245117, "eval_rewards/margins": 18.064340591430664, "eval_rewards/real": -1.6714372634887695, "eval_runtime": 328.1821, "eval_samples_per_second": 15.235, "eval_steps_per_second": 0.478, "step": 300 }, { "epoch": 0.18, "learning_rate": 4.5539754363283774e-07, "logits/generated": -2.4415905475616455, "logits/real": -2.3295607566833496, "logps/generated": -287.18682861328125, "logps/real": -155.09823608398438, "loss": 0.2118, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -16.675642013549805, "rewards/margins": 14.781652450561523, "rewards/real": -1.8939898014068604, "step": 310 }, { "epoch": 0.19, "learning_rate": 4.5216548157724625e-07, "logits/generated": -2.463688850402832, "logits/real": -2.4042744636535645, "logps/generated": -241.97543334960938, "logps/real": -152.55368041992188, "loss": 0.2114, "rewards/accuracies": 0.862500011920929, "rewards/generated": -12.119455337524414, "rewards/margins": 10.819900512695312, "rewards/real": -1.2995555400848389, "step": 320 }, { "epoch": 0.19, "learning_rate": 4.489334195216548e-07, "logits/generated": -2.4680073261260986, "logits/real": -2.427863359451294, "logps/generated": -251.1258087158203, "logps/real": -163.27366638183594, "loss": 0.2059, "rewards/accuracies": 0.9375, "rewards/generated": -12.832430839538574, "rewards/margins": 11.404412269592285, "rewards/real": -1.4280211925506592, "step": 330 }, { "epoch": 0.2, "learning_rate": 4.4570135746606334e-07, "logits/generated": -2.4172284603118896, "logits/real": -2.3573529720306396, "logps/generated": -298.1008605957031, "logps/real": -156.8496856689453, "loss": 0.1686, "rewards/accuracies": 0.875, "rewards/generated": -18.446123123168945, "rewards/margins": 16.2222900390625, "rewards/real": -2.2238337993621826, "step": 340 }, { "epoch": 0.2, "learning_rate": 4.4246929541047185e-07, "logits/generated": -2.4144504070281982, "logits/real": -2.3207590579986572, "logps/generated": -212.0926055908203, "logps/real": -166.0170135498047, "loss": 0.2305, "rewards/accuracies": 0.824999988079071, "rewards/generated": -9.594592094421387, "rewards/margins": 6.877285003662109, "rewards/real": -2.7173075675964355, "step": 350 }, { "epoch": 0.21, "learning_rate": 4.3923723335488036e-07, "logits/generated": -2.4157674312591553, "logits/real": -2.4187474250793457, "logps/generated": -303.06683349609375, "logps/real": -163.6787872314453, "loss": 0.1917, "rewards/accuracies": 0.875, "rewards/generated": -18.166648864746094, "rewards/margins": 15.954483032226562, "rewards/real": -2.2121691703796387, "step": 360 }, { "epoch": 0.22, "learning_rate": 4.3600517129928893e-07, "logits/generated": -2.465456485748291, "logits/real": -2.445255756378174, "logps/generated": -245.350341796875, "logps/real": -177.14071655273438, "loss": 0.2148, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -10.88718032836914, "rewards/margins": 8.129331588745117, "rewards/real": -2.7578492164611816, "step": 370 }, { "epoch": 0.22, "learning_rate": 4.327731092436975e-07, "logits/generated": -2.4552018642425537, "logits/real": -2.424823760986328, "logps/generated": -252.9711456298828, "logps/real": -154.53909301757812, "loss": 0.254, "rewards/accuracies": 0.8125, "rewards/generated": -13.141611099243164, "rewards/margins": 10.592477798461914, "rewards/real": -2.5491321086883545, "step": 380 }, { "epoch": 0.23, "learning_rate": 4.2954104718810596e-07, "logits/generated": -2.464360475540161, "logits/real": -2.444789409637451, "logps/generated": -296.9568786621094, "logps/real": -163.8868408203125, "loss": 0.2066, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -17.908769607543945, "rewards/margins": 15.718464851379395, "rewards/real": -2.190305233001709, "step": 390 }, { "epoch": 0.23, "learning_rate": 4.2630898513251453e-07, "logits/generated": -2.506937265396118, "logits/real": -2.470973253250122, "logps/generated": -294.0633239746094, "logps/real": -160.49642944335938, "loss": 0.2207, "rewards/accuracies": 0.7875000238418579, "rewards/generated": -17.2155704498291, "rewards/margins": 14.985618591308594, "rewards/real": -2.229950189590454, "step": 400 }, { "epoch": 0.23, "eval_logits/generated": -2.4888620376586914, "eval_logits/real": -2.452549695968628, "eval_logps/generated": -303.0499572753906, "eval_logps/real": -154.09176635742188, "eval_loss": 0.1093968003988266, "eval_rewards/accuracies": 0.9729299545288086, "eval_rewards/generated": -20.673315048217773, "eval_rewards/margins": 18.73069953918457, "eval_rewards/real": -1.942617416381836, "eval_runtime": 327.2801, "eval_samples_per_second": 15.277, "eval_steps_per_second": 0.48, "step": 400 }, { "epoch": 0.24, "learning_rate": 4.2307692307692304e-07, "logits/generated": -2.543400287628174, "logits/real": -2.417271614074707, "logps/generated": -281.9232177734375, "logps/real": -161.21182250976562, "loss": 0.1673, "rewards/accuracies": 0.925000011920929, "rewards/generated": -14.634592056274414, "rewards/margins": 12.80103588104248, "rewards/real": -1.8335540294647217, "step": 410 }, { "epoch": 0.24, "learning_rate": 4.198448610213316e-07, "logits/generated": -2.477651357650757, "logits/real": -2.4087789058685303, "logps/generated": -285.4056701660156, "logps/real": -157.8638916015625, "loss": 0.201, "rewards/accuracies": 0.862500011920929, "rewards/generated": -16.256460189819336, "rewards/margins": 13.421223640441895, "rewards/real": -2.835240602493286, "step": 420 }, { "epoch": 0.25, "learning_rate": 4.166127989657401e-07, "logits/generated": -2.4897210597991943, "logits/real": -2.4585866928100586, "logps/generated": -301.2572937011719, "logps/real": -160.141845703125, "loss": 0.1924, "rewards/accuracies": 0.949999988079071, "rewards/generated": -17.867128372192383, "rewards/margins": 15.476908683776855, "rewards/real": -2.3902173042297363, "step": 430 }, { "epoch": 0.26, "learning_rate": 4.1338073691014864e-07, "logits/generated": -2.5035512447357178, "logits/real": -2.3802120685577393, "logps/generated": -332.97222900390625, "logps/real": -154.29495239257812, "loss": 0.175, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -21.23163414001465, "rewards/margins": 18.370525360107422, "rewards/real": -2.861109972000122, "step": 440 }, { "epoch": 0.26, "learning_rate": 4.1014867485455715e-07, "logits/generated": -2.418759822845459, "logits/real": -2.3379578590393066, "logps/generated": -336.86175537109375, "logps/real": -156.2376251220703, "loss": 0.1849, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -22.108346939086914, "rewards/margins": 19.20734977722168, "rewards/real": -2.900996685028076, "step": 450 }, { "epoch": 0.27, "learning_rate": 4.069166127989657e-07, "logits/generated": -2.4547057151794434, "logits/real": -2.286864995956421, "logps/generated": -276.150390625, "logps/real": -144.59564208984375, "loss": 0.1647, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -16.615272521972656, "rewards/margins": 14.403627395629883, "rewards/real": -2.211641788482666, "step": 460 }, { "epoch": 0.27, "learning_rate": 4.036845507433743e-07, "logits/generated": -2.4923219680786133, "logits/real": -2.362297296524048, "logps/generated": -312.7582702636719, "logps/real": -159.96646118164062, "loss": 0.1664, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -18.795948028564453, "rewards/margins": 16.829730987548828, "rewards/real": -1.9662189483642578, "step": 470 }, { "epoch": 0.28, "learning_rate": 4.004524886877828e-07, "logits/generated": -2.490182399749756, "logits/real": -2.3277366161346436, "logps/generated": -391.92047119140625, "logps/real": -153.3131866455078, "loss": 0.1659, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -28.46506118774414, "rewards/margins": 26.065074920654297, "rewards/real": -2.39998459815979, "step": 480 }, { "epoch": 0.29, "learning_rate": 3.972204266321913e-07, "logits/generated": -2.507981777191162, "logits/real": -2.405111789703369, "logps/generated": -244.44027709960938, "logps/real": -147.1367950439453, "loss": 0.1879, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -13.030759811401367, "rewards/margins": 11.476921081542969, "rewards/real": -1.5538378953933716, "step": 490 }, { "epoch": 0.29, "learning_rate": 3.9398836457659983e-07, "logits/generated": -2.5344769954681396, "logits/real": -2.3722431659698486, "logps/generated": -196.79986572265625, "logps/real": -149.31695556640625, "loss": 0.4379, "rewards/accuracies": 0.875, "rewards/generated": -6.881800174713135, "rewards/margins": 5.857499599456787, "rewards/real": -1.0242998600006104, "step": 500 }, { "epoch": 0.29, "eval_logits/generated": -2.386992931365967, "eval_logits/real": -2.344109296798706, "eval_logps/generated": -179.73768615722656, "eval_logps/real": -144.66737365722656, "eval_loss": 0.11515343934297562, "eval_rewards/accuracies": 0.9665604829788208, "eval_rewards/generated": -8.342087745666504, "eval_rewards/margins": 7.341910362243652, "eval_rewards/real": -1.000178575515747, "eval_runtime": 326.2978, "eval_samples_per_second": 15.323, "eval_steps_per_second": 0.481, "step": 500 }, { "epoch": 0.3, "learning_rate": 3.907563025210084e-07, "logits/generated": -2.3947510719299316, "logits/real": -2.409266471862793, "logps/generated": -194.82321166992188, "logps/real": -157.6947784423828, "loss": 0.1571, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -7.0470075607299805, "rewards/margins": 5.375405311584473, "rewards/real": -1.6716020107269287, "step": 510 }, { "epoch": 0.3, "learning_rate": 3.875242404654169e-07, "logits/generated": -2.335791826248169, "logits/real": -2.3169474601745605, "logps/generated": -207.33126831054688, "logps/real": -156.51400756835938, "loss": 0.1419, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -9.35986042022705, "rewards/margins": 6.684755802154541, "rewards/real": -2.675104856491089, "step": 520 }, { "epoch": 0.31, "learning_rate": 3.842921784098255e-07, "logits/generated": -2.3748762607574463, "logits/real": -2.400601625442505, "logps/generated": -222.042724609375, "logps/real": -170.61839294433594, "loss": 0.1629, "rewards/accuracies": 0.875, "rewards/generated": -9.549965858459473, "rewards/margins": 7.031239986419678, "rewards/real": -2.518725633621216, "step": 530 }, { "epoch": 0.31, "learning_rate": 3.8106011635423394e-07, "logits/generated": -2.3414711952209473, "logits/real": -2.2793216705322266, "logps/generated": -219.4429168701172, "logps/real": -160.81027221679688, "loss": 0.1361, "rewards/accuracies": 0.887499988079071, "rewards/generated": -10.753046989440918, "rewards/margins": 8.526832580566406, "rewards/real": -2.22621488571167, "step": 540 }, { "epoch": 0.32, "learning_rate": 3.778280542986425e-07, "logits/generated": -2.356356143951416, "logits/real": -2.2714390754699707, "logps/generated": -216.5394287109375, "logps/real": -145.5234832763672, "loss": 0.1829, "rewards/accuracies": 0.875, "rewards/generated": -10.430225372314453, "rewards/margins": 8.845321655273438, "rewards/real": -1.5849040746688843, "step": 550 }, { "epoch": 0.33, "learning_rate": 3.745959922430511e-07, "logits/generated": -2.3817245960235596, "logits/real": -2.2892487049102783, "logps/generated": -200.70140075683594, "logps/real": -155.33592224121094, "loss": 0.1745, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -8.53106689453125, "rewards/margins": 6.320080280303955, "rewards/real": -2.210986614227295, "step": 560 }, { "epoch": 0.33, "learning_rate": 3.713639301874596e-07, "logits/generated": -2.365756034851074, "logits/real": -2.330933094024658, "logps/generated": -228.14828491210938, "logps/real": -170.01014709472656, "loss": 0.1399, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.85645866394043, "rewards/margins": 8.518800735473633, "rewards/real": -2.3376574516296387, "step": 570 }, { "epoch": 0.34, "learning_rate": 3.6813186813186816e-07, "logits/generated": -2.2974698543548584, "logits/real": -2.2853493690490723, "logps/generated": -231.7186737060547, "logps/real": -154.68409729003906, "loss": 0.144, "rewards/accuracies": 0.875, "rewards/generated": -11.895675659179688, "rewards/margins": 9.608539581298828, "rewards/real": -2.2871367931365967, "step": 580 }, { "epoch": 0.34, "learning_rate": 3.648998060762766e-07, "logits/generated": -2.323545217514038, "logits/real": -2.2863316535949707, "logps/generated": -216.95333862304688, "logps/real": -152.7818145751953, "loss": 0.1667, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -9.12224006652832, "rewards/margins": 7.0738372802734375, "rewards/real": -2.0484039783477783, "step": 590 }, { "epoch": 0.35, "learning_rate": 3.616677440206852e-07, "logits/generated": -2.39691162109375, "logits/real": -2.270139217376709, "logps/generated": -199.4903106689453, "logps/real": -154.24615478515625, "loss": 0.1517, "rewards/accuracies": 0.875, "rewards/generated": -6.92580509185791, "rewards/margins": 5.0083699226379395, "rewards/real": -1.9174346923828125, "step": 600 }, { "epoch": 0.35, "eval_logits/generated": -2.2691421508789062, "eval_logits/real": -2.274235725402832, "eval_logps/generated": -225.55331420898438, "eval_logps/real": -151.24253845214844, "eval_loss": 0.09844768047332764, "eval_rewards/accuracies": 0.9745222926139832, "eval_rewards/generated": -12.923652648925781, "eval_rewards/margins": 11.265958786010742, "eval_rewards/real": -1.6576942205429077, "eval_runtime": 324.4499, "eval_samples_per_second": 15.411, "eval_steps_per_second": 0.484, "step": 600 }, { "epoch": 0.35, "learning_rate": 3.584356819650937e-07, "logits/generated": -2.3561298847198486, "logits/real": -2.2546021938323975, "logps/generated": -272.76910400390625, "logps/real": -166.93630981445312, "loss": 0.1387, "rewards/accuracies": 0.925000011920929, "rewards/generated": -13.198400497436523, "rewards/margins": 10.639951705932617, "rewards/real": -2.5584499835968018, "step": 610 }, { "epoch": 0.36, "learning_rate": 3.5520361990950227e-07, "logits/generated": -2.3207361698150635, "logits/real": -2.2687323093414307, "logps/generated": -254.7984619140625, "logps/real": -156.127197265625, "loss": 0.1328, "rewards/accuracies": 0.925000011920929, "rewards/generated": -13.316617965698242, "rewards/margins": 11.088491439819336, "rewards/real": -2.228126049041748, "step": 620 }, { "epoch": 0.37, "learning_rate": 3.519715578539108e-07, "logits/generated": -2.2628586292266846, "logits/real": -2.2417876720428467, "logps/generated": -217.26290893554688, "logps/real": -147.60806274414062, "loss": 0.1532, "rewards/accuracies": 0.862500011920929, "rewards/generated": -10.866621017456055, "rewards/margins": 8.915276527404785, "rewards/real": -1.95134699344635, "step": 630 }, { "epoch": 0.37, "learning_rate": 3.487394957983193e-07, "logits/generated": -2.2968461513519287, "logits/real": -2.362973928451538, "logps/generated": -230.63998413085938, "logps/real": -171.29905700683594, "loss": 0.1631, "rewards/accuracies": 0.862500011920929, "rewards/generated": -10.994275093078613, "rewards/margins": 8.367854118347168, "rewards/real": -2.626420497894287, "step": 640 }, { "epoch": 0.38, "learning_rate": 3.4550743374272786e-07, "logits/generated": -2.4199037551879883, "logits/real": -2.3053011894226074, "logps/generated": -240.74765014648438, "logps/real": -161.87173461914062, "loss": 0.1482, "rewards/accuracies": 0.875, "rewards/generated": -11.601387023925781, "rewards/margins": 8.92815113067627, "rewards/real": -2.6732351779937744, "step": 650 }, { "epoch": 0.38, "learning_rate": 3.422753716871364e-07, "logits/generated": -2.2776732444763184, "logits/real": -2.212689161300659, "logps/generated": -256.89129638671875, "logps/real": -144.78775024414062, "loss": 0.1372, "rewards/accuracies": 0.824999988079071, "rewards/generated": -14.179117202758789, "rewards/margins": 11.40053653717041, "rewards/real": -2.778578996658325, "step": 660 }, { "epoch": 0.39, "learning_rate": 3.3904330963154494e-07, "logits/generated": -2.2712647914886475, "logits/real": -2.31594181060791, "logps/generated": -285.45355224609375, "logps/real": -159.91629028320312, "loss": 0.1318, "rewards/accuracies": 0.9375, "rewards/generated": -15.960909843444824, "rewards/margins": 13.720555305480957, "rewards/real": -2.2403564453125, "step": 670 }, { "epoch": 0.4, "learning_rate": 3.358112475759534e-07, "logits/generated": -2.2503881454467773, "logits/real": -2.2118101119995117, "logps/generated": -269.7525939941406, "logps/real": -148.83535766601562, "loss": 0.1308, "rewards/accuracies": 0.925000011920929, "rewards/generated": -14.820501327514648, "rewards/margins": 12.614280700683594, "rewards/real": -2.206221580505371, "step": 680 }, { "epoch": 0.4, "learning_rate": 3.3257918552036197e-07, "logits/generated": -2.292738676071167, "logits/real": -2.2858123779296875, "logps/generated": -216.58059692382812, "logps/real": -155.36279296875, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/generated": -10.16190242767334, "rewards/margins": 8.57901668548584, "rewards/real": -1.5828853845596313, "step": 690 }, { "epoch": 0.41, "learning_rate": 3.293471234647705e-07, "logits/generated": -2.191201686859131, "logits/real": -2.1718242168426514, "logps/generated": -248.41610717773438, "logps/real": -161.17881774902344, "loss": 0.1708, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -14.112585067749023, "rewards/margins": 11.10939884185791, "rewards/real": -3.0031871795654297, "step": 700 }, { "epoch": 0.41, "eval_logits/generated": -2.234299659729004, "eval_logits/real": -2.2124428749084473, "eval_logps/generated": -238.25738525390625, "eval_logps/real": -154.1605224609375, "eval_loss": 0.0865996927022934, "eval_rewards/accuracies": 0.9745222926139832, "eval_rewards/generated": -14.19405746459961, "eval_rewards/margins": 12.244565963745117, "eval_rewards/real": -1.949493408203125, "eval_runtime": 326.243, "eval_samples_per_second": 15.326, "eval_steps_per_second": 0.481, "step": 700 }, { "epoch": 0.41, "learning_rate": 3.2611506140917905e-07, "logits/generated": -2.3678855895996094, "logits/real": -2.254812717437744, "logps/generated": -200.08316040039062, "logps/real": -157.531494140625, "loss": 0.1238, "rewards/accuracies": 0.862500011920929, "rewards/generated": -8.445894241333008, "rewards/margins": 6.2266740798950195, "rewards/real": -2.2192206382751465, "step": 710 }, { "epoch": 0.42, "learning_rate": 3.2288299935358757e-07, "logits/generated": -2.294254779815674, "logits/real": -2.110302448272705, "logps/generated": -250.7073516845703, "logps/real": -144.10202026367188, "loss": 0.1251, "rewards/accuracies": 0.887499988079071, "rewards/generated": -13.250715255737305, "rewards/margins": 10.665332794189453, "rewards/real": -2.585383415222168, "step": 720 }, { "epoch": 0.42, "learning_rate": 3.196509372979961e-07, "logits/generated": -2.277039051055908, "logits/real": -2.163405656814575, "logps/generated": -249.3871612548828, "logps/real": -158.72262573242188, "loss": 0.1312, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.719002723693848, "rewards/margins": 10.320619583129883, "rewards/real": -2.398383617401123, "step": 730 }, { "epoch": 0.43, "learning_rate": 3.1641887524240465e-07, "logits/generated": -2.1525888442993164, "logits/real": -2.1593430042266846, "logps/generated": -276.4872741699219, "logps/real": -169.02743530273438, "loss": 0.0971, "rewards/accuracies": 0.9375, "rewards/generated": -15.876765251159668, "rewards/margins": 13.597285270690918, "rewards/real": -2.279479503631592, "step": 740 }, { "epoch": 0.44, "learning_rate": 3.1318681318681316e-07, "logits/generated": -2.177074432373047, "logits/real": -2.260298013687134, "logps/generated": -236.02206420898438, "logps/real": -170.76596069335938, "loss": 0.1008, "rewards/accuracies": 0.925000011920929, "rewards/generated": -11.86265754699707, "rewards/margins": 9.2439546585083, "rewards/real": -2.6187024116516113, "step": 750 }, { "epoch": 0.44, "learning_rate": 3.0995475113122173e-07, "logits/generated": -2.1248366832733154, "logits/real": -2.187145471572876, "logps/generated": -271.3382263183594, "logps/real": -168.44320678710938, "loss": 0.1217, "rewards/accuracies": 0.875, "rewards/generated": -15.90544319152832, "rewards/margins": 12.456186294555664, "rewards/real": -3.4492554664611816, "step": 760 }, { "epoch": 0.45, "learning_rate": 3.0672268907563024e-07, "logits/generated": -2.2197773456573486, "logits/real": -2.114558219909668, "logps/generated": -241.92642211914062, "logps/real": -143.5634307861328, "loss": 0.1154, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -13.74907398223877, "rewards/margins": 11.252575874328613, "rewards/real": -2.4964985847473145, "step": 770 }, { "epoch": 0.45, "learning_rate": 3.0349062702003876e-07, "logits/generated": -2.202606678009033, "logits/real": -2.1499722003936768, "logps/generated": -271.87738037109375, "logps/real": -161.31857299804688, "loss": 0.1251, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -16.116554260253906, "rewards/margins": 13.589006423950195, "rewards/real": -2.5275490283966064, "step": 780 }, { "epoch": 0.46, "learning_rate": 3.0025856496444727e-07, "logits/generated": -2.345546007156372, "logits/real": -2.2405107021331787, "logps/generated": -220.90701293945312, "logps/real": -160.68800354003906, "loss": 0.1607, "rewards/accuracies": 0.875, "rewards/generated": -10.754340171813965, "rewards/margins": 8.159425735473633, "rewards/real": -2.5949156284332275, "step": 790 }, { "epoch": 0.47, "learning_rate": 2.9702650290885584e-07, "logits/generated": -2.2388525009155273, "logits/real": -2.1353354454040527, "logps/generated": -277.51861572265625, "logps/real": -154.5572509765625, "loss": 0.1135, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -16.760658264160156, "rewards/margins": 13.79723072052002, "rewards/real": -2.963425874710083, "step": 800 }, { "epoch": 0.47, "eval_logits/generated": -2.1789329051971436, "eval_logits/real": -2.198742628097534, "eval_logps/generated": -260.81390380859375, "eval_logps/real": -164.8361358642578, "eval_loss": 0.08095261454582214, "eval_rewards/accuracies": 0.9785031676292419, "eval_rewards/generated": -16.449708938598633, "eval_rewards/margins": 13.432653427124023, "eval_rewards/real": -3.017056465148926, "eval_runtime": 325.1652, "eval_samples_per_second": 15.377, "eval_steps_per_second": 0.483, "step": 800 }, { "epoch": 0.47, "learning_rate": 2.9379444085326436e-07, "logits/generated": -2.269972085952759, "logits/real": -2.200730085372925, "logps/generated": -241.37060546875, "logps/real": -160.4932403564453, "loss": 0.1393, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -12.075285911560059, "rewards/margins": 8.877761840820312, "rewards/real": -3.1975245475769043, "step": 810 }, { "epoch": 0.48, "learning_rate": 2.905623787976729e-07, "logits/generated": -2.1529345512390137, "logits/real": -2.130723237991333, "logps/generated": -278.5174560546875, "logps/real": -172.23374938964844, "loss": 0.1035, "rewards/accuracies": 0.887499988079071, "rewards/generated": -16.6492977142334, "rewards/margins": 13.676687240600586, "rewards/real": -2.9726104736328125, "step": 820 }, { "epoch": 0.48, "learning_rate": 2.8733031674208144e-07, "logits/generated": -2.264868974685669, "logits/real": -2.2218079566955566, "logps/generated": -233.82815551757812, "logps/real": -155.95506286621094, "loss": 0.1595, "rewards/accuracies": 0.875, "rewards/generated": -11.269330024719238, "rewards/margins": 8.389703750610352, "rewards/real": -2.879626750946045, "step": 830 }, { "epoch": 0.49, "learning_rate": 2.8409825468648995e-07, "logits/generated": -2.287550926208496, "logits/real": -2.292382001876831, "logps/generated": -261.179443359375, "logps/real": -170.32998657226562, "loss": 0.1342, "rewards/accuracies": 0.887499988079071, "rewards/generated": -14.10278034210205, "rewards/margins": 11.701745986938477, "rewards/real": -2.4010345935821533, "step": 840 }, { "epoch": 0.49, "learning_rate": 2.808661926308985e-07, "logits/generated": -2.33073091506958, "logits/real": -2.274296283721924, "logps/generated": -241.5367431640625, "logps/real": -149.47222900390625, "loss": 0.115, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.70106315612793, "rewards/margins": 10.1942777633667, "rewards/real": -2.5067856311798096, "step": 850 }, { "epoch": 0.5, "learning_rate": 2.7763413057530703e-07, "logits/generated": -2.38000226020813, "logits/real": -2.3325276374816895, "logps/generated": -247.8423614501953, "logps/real": -173.57339477539062, "loss": 0.131, "rewards/accuracies": 0.887499988079071, "rewards/generated": -11.641576766967773, "rewards/margins": 9.302278518676758, "rewards/real": -2.339297294616699, "step": 860 }, { "epoch": 0.51, "learning_rate": 2.744020685197156e-07, "logits/generated": -2.4010801315307617, "logits/real": -2.257711887359619, "logps/generated": -246.85153198242188, "logps/real": -166.25039672851562, "loss": 0.1502, "rewards/accuracies": 0.9375, "rewards/generated": -11.700878143310547, "rewards/margins": 9.326895713806152, "rewards/real": -2.3739829063415527, "step": 870 }, { "epoch": 0.51, "learning_rate": 2.7117000646412406e-07, "logits/generated": -2.350351572036743, "logits/real": -2.226243734359741, "logps/generated": -263.51776123046875, "logps/real": -150.32102966308594, "loss": 0.1257, "rewards/accuracies": 0.887499988079071, "rewards/generated": -14.319729804992676, "rewards/margins": 12.047926902770996, "rewards/real": -2.2718007564544678, "step": 880 }, { "epoch": 0.52, "learning_rate": 2.6793794440853263e-07, "logits/generated": -2.4051427841186523, "logits/real": -2.3996026515960693, "logps/generated": -235.8258819580078, "logps/real": -158.03170776367188, "loss": 0.1842, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.911969184875488, "rewards/margins": 11.036763191223145, "rewards/real": -1.8752062320709229, "step": 890 }, { "epoch": 0.52, "learning_rate": 2.6470588235294114e-07, "logits/generated": -2.4504921436309814, "logits/real": -2.312851905822754, "logps/generated": -253.6083526611328, "logps/real": -168.83355712890625, "loss": 0.1364, "rewards/accuracies": 0.8125, "rewards/generated": -12.168034553527832, "rewards/margins": 8.825535774230957, "rewards/real": -3.3424973487854004, "step": 900 }, { "epoch": 0.52, "eval_logits/generated": -2.3294870853424072, "eval_logits/real": -2.3367509841918945, "eval_logps/generated": -244.4078369140625, "eval_logps/real": -160.21507263183594, "eval_loss": 0.08480827510356903, "eval_rewards/accuracies": 0.9729299545288086, "eval_rewards/generated": -14.809103965759277, "eval_rewards/margins": 12.254154205322266, "eval_rewards/real": -2.5549488067626953, "eval_runtime": 325.558, "eval_samples_per_second": 15.358, "eval_steps_per_second": 0.482, "step": 900 }, { "epoch": 0.53, "learning_rate": 2.614738202973497e-07, "logits/generated": -2.37715482711792, "logits/real": -2.3596484661102295, "logps/generated": -225.02578735351562, "logps/real": -159.61839294433594, "loss": 0.1056, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -11.049263000488281, "rewards/margins": 7.907676696777344, "rewards/real": -3.1415863037109375, "step": 910 }, { "epoch": 0.54, "learning_rate": 2.582417582417583e-07, "logits/generated": -2.3448710441589355, "logits/real": -2.323169231414795, "logps/generated": -246.141357421875, "logps/real": -171.561767578125, "loss": 0.0895, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.90569019317627, "rewards/margins": 9.77393913269043, "rewards/real": -3.1317505836486816, "step": 920 }, { "epoch": 0.54, "learning_rate": 2.5500969618616674e-07, "logits/generated": -2.3373780250549316, "logits/real": -2.2862701416015625, "logps/generated": -267.49029541015625, "logps/real": -170.78024291992188, "loss": 0.1233, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -14.549161911010742, "rewards/margins": 11.566902160644531, "rewards/real": -2.9822611808776855, "step": 930 }, { "epoch": 0.55, "learning_rate": 2.517776341305753e-07, "logits/generated": -2.359157085418701, "logits/real": -2.3121438026428223, "logps/generated": -207.0237274169922, "logps/real": -151.88720703125, "loss": 0.1634, "rewards/accuracies": 0.887499988079071, "rewards/generated": -10.258100509643555, "rewards/margins": 7.330819606781006, "rewards/real": -2.927279472351074, "step": 940 }, { "epoch": 0.55, "learning_rate": 2.485455720749838e-07, "logits/generated": -2.397444009780884, "logits/real": -2.3451006412506104, "logps/generated": -255.43264770507812, "logps/real": -172.95346069335938, "loss": 0.0939, "rewards/accuracies": 0.9375, "rewards/generated": -12.080055236816406, "rewards/margins": 9.478727340698242, "rewards/real": -2.601327419281006, "step": 950 }, { "epoch": 0.56, "learning_rate": 2.4531351001939233e-07, "logits/generated": -2.354841947555542, "logits/real": -2.379093885421753, "logps/generated": -275.9471435546875, "logps/real": -189.16122436523438, "loss": 0.1414, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -14.650967597961426, "rewards/margins": 11.281542778015137, "rewards/real": -3.369422435760498, "step": 960 }, { "epoch": 0.56, "learning_rate": 2.420814479638009e-07, "logits/generated": -2.3819050788879395, "logits/real": -2.2698609828948975, "logps/generated": -278.7743835449219, "logps/real": -171.54296875, "loss": 0.1414, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -14.173556327819824, "rewards/margins": 10.707517623901367, "rewards/real": -3.4660377502441406, "step": 970 }, { "epoch": 0.57, "learning_rate": 2.388493859082094e-07, "logits/generated": -2.349855899810791, "logits/real": -2.2393364906311035, "logps/generated": -276.61981201171875, "logps/real": -174.84996032714844, "loss": 0.1173, "rewards/accuracies": 0.887499988079071, "rewards/generated": -15.051767349243164, "rewards/margins": 11.607970237731934, "rewards/real": -3.443795680999756, "step": 980 }, { "epoch": 0.58, "learning_rate": 2.3561732385261796e-07, "logits/generated": -2.334862232208252, "logits/real": -2.3226168155670166, "logps/generated": -275.31512451171875, "logps/real": -156.58956909179688, "loss": 0.1284, "rewards/accuracies": 0.862500011920929, "rewards/generated": -14.1647310256958, "rewards/margins": 12.026006698608398, "rewards/real": -2.1387248039245605, "step": 990 }, { "epoch": 0.58, "learning_rate": 2.323852617970265e-07, "logits/generated": -2.498349189758301, "logits/real": -2.4359524250030518, "logps/generated": -256.3857727050781, "logps/real": -175.72055053710938, "loss": 0.1142, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -11.867788314819336, "rewards/margins": 8.631060600280762, "rewards/real": -3.2367255687713623, "step": 1000 }, { "epoch": 0.58, "eval_logits/generated": -2.4644362926483154, "eval_logits/real": -2.478717565536499, "eval_logps/generated": -202.75526428222656, "eval_logps/real": -161.3638458251953, "eval_loss": 0.09024719893932343, "eval_rewards/accuracies": 0.9713375568389893, "eval_rewards/generated": -10.643847465515137, "eval_rewards/margins": 7.974020957946777, "eval_rewards/real": -2.669825553894043, "eval_runtime": 325.0517, "eval_samples_per_second": 15.382, "eval_steps_per_second": 0.483, "step": 1000 }, { "epoch": 0.59, "learning_rate": 2.29153199741435e-07, "logits/generated": -2.505728244781494, "logits/real": -2.505375385284424, "logps/generated": -230.0421600341797, "logps/real": -168.85704040527344, "loss": 0.1156, "rewards/accuracies": 0.9375, "rewards/generated": -10.663579940795898, "rewards/margins": 7.189150333404541, "rewards/real": -3.474430799484253, "step": 1010 }, { "epoch": 0.59, "learning_rate": 2.2592113768584355e-07, "logits/generated": -2.5055289268493652, "logits/real": -2.5188517570495605, "logps/generated": -253.8979034423828, "logps/real": -199.04542541503906, "loss": 0.1064, "rewards/accuracies": 0.887499988079071, "rewards/generated": -11.993169784545898, "rewards/margins": 7.974666595458984, "rewards/real": -4.018503665924072, "step": 1020 }, { "epoch": 0.6, "learning_rate": 2.226890756302521e-07, "logits/generated": -2.465848207473755, "logits/real": -2.450221061706543, "logps/generated": -267.3594665527344, "logps/real": -190.57058715820312, "loss": 0.0993, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -13.37690544128418, "rewards/margins": 9.04112720489502, "rewards/real": -4.33577823638916, "step": 1030 }, { "epoch": 0.61, "learning_rate": 2.1945701357466063e-07, "logits/generated": -2.4809277057647705, "logits/real": -2.448288917541504, "logps/generated": -244.9989776611328, "logps/real": -176.53506469726562, "loss": 0.138, "rewards/accuracies": 0.875, "rewards/generated": -12.209803581237793, "rewards/margins": 7.888075828552246, "rewards/real": -4.321727752685547, "step": 1040 }, { "epoch": 0.61, "learning_rate": 2.1622495151906917e-07, "logits/generated": -2.5043458938598633, "logits/real": -2.4832687377929688, "logps/generated": -245.507568359375, "logps/real": -173.8964385986328, "loss": 0.0956, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.928030014038086, "rewards/margins": 8.440313339233398, "rewards/real": -3.4877171516418457, "step": 1050 }, { "epoch": 0.62, "learning_rate": 2.129928894634777e-07, "logits/generated": -2.5427870750427246, "logits/real": -2.499741315841675, "logps/generated": -237.24887084960938, "logps/real": -168.909423828125, "loss": 0.1476, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -10.177728652954102, "rewards/margins": 7.252813816070557, "rewards/real": -2.9249141216278076, "step": 1060 }, { "epoch": 0.62, "learning_rate": 2.0976082740788623e-07, "logits/generated": -2.4873366355895996, "logits/real": -2.4385483264923096, "logps/generated": -206.4569549560547, "logps/real": -153.2084503173828, "loss": 0.1223, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -10.100156784057617, "rewards/margins": 7.314939975738525, "rewards/real": -2.785216808319092, "step": 1070 }, { "epoch": 0.63, "learning_rate": 2.0652876535229474e-07, "logits/generated": -2.5049309730529785, "logits/real": -2.456437587738037, "logps/generated": -214.497314453125, "logps/real": -164.29551696777344, "loss": 0.0969, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -9.749935150146484, "rewards/margins": 6.255521774291992, "rewards/real": -3.4944145679473877, "step": 1080 }, { "epoch": 0.63, "learning_rate": 2.0329670329670329e-07, "logits/generated": -2.5299432277679443, "logits/real": -2.425293445587158, "logps/generated": -239.8467254638672, "logps/real": -163.84437561035156, "loss": 0.0989, "rewards/accuracies": 0.9375, "rewards/generated": -10.874212265014648, "rewards/margins": 8.04706859588623, "rewards/real": -2.8271448612213135, "step": 1090 }, { "epoch": 0.64, "learning_rate": 2.0006464124111183e-07, "logits/generated": -2.4800021648406982, "logits/real": -2.451796293258667, "logps/generated": -237.79653930664062, "logps/real": -167.1535186767578, "loss": 0.1332, "rewards/accuracies": 0.875, "rewards/generated": -12.058743476867676, "rewards/margins": 8.454904556274414, "rewards/real": -3.6038384437561035, "step": 1100 }, { "epoch": 0.64, "eval_logits/generated": -2.4417178630828857, "eval_logits/real": -2.462984800338745, "eval_logps/generated": -215.05523681640625, "eval_logps/real": -162.10159301757812, "eval_loss": 0.07708299905061722, "eval_rewards/accuracies": 0.9785031676292419, "eval_rewards/generated": -11.873842239379883, "eval_rewards/margins": 9.130241394042969, "eval_rewards/real": -2.743600368499756, "eval_runtime": 326.4428, "eval_samples_per_second": 15.317, "eval_steps_per_second": 0.481, "step": 1100 }, { "epoch": 0.65, "learning_rate": 1.9683257918552034e-07, "logits/generated": -2.4482009410858154, "logits/real": -2.4847419261932373, "logps/generated": -222.0105438232422, "logps/real": -158.9528350830078, "loss": 0.1071, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -11.227673530578613, "rewards/margins": 8.365415573120117, "rewards/real": -2.862257480621338, "step": 1110 }, { "epoch": 0.65, "learning_rate": 1.9360051712992888e-07, "logits/generated": -2.4634220600128174, "logits/real": -2.4727044105529785, "logps/generated": -239.70742797851562, "logps/real": -174.02890014648438, "loss": 0.0992, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -12.579872131347656, "rewards/margins": 8.563131332397461, "rewards/real": -4.016742706298828, "step": 1120 }, { "epoch": 0.66, "learning_rate": 1.903684550743374e-07, "logits/generated": -2.430037260055542, "logits/real": -2.390148639678955, "logps/generated": -231.12417602539062, "logps/real": -151.89051818847656, "loss": 0.1022, "rewards/accuracies": 0.887499988079071, "rewards/generated": -12.785438537597656, "rewards/margins": 9.053568840026855, "rewards/real": -3.731870174407959, "step": 1130 }, { "epoch": 0.66, "learning_rate": 1.8713639301874596e-07, "logits/generated": -2.4577813148498535, "logits/real": -2.450479030609131, "logps/generated": -251.19302368164062, "logps/real": -171.656982421875, "loss": 0.1237, "rewards/accuracies": 0.875, "rewards/generated": -13.755993843078613, "rewards/margins": 9.537080764770508, "rewards/real": -4.2189130783081055, "step": 1140 }, { "epoch": 0.67, "learning_rate": 1.839043309631545e-07, "logits/generated": -2.449868679046631, "logits/real": -2.485016107559204, "logps/generated": -256.3826904296875, "logps/real": -172.46572875976562, "loss": 0.1185, "rewards/accuracies": 0.887499988079071, "rewards/generated": -13.964202880859375, "rewards/margins": 9.694369316101074, "rewards/real": -4.269833564758301, "step": 1150 }, { "epoch": 0.67, "learning_rate": 1.8067226890756302e-07, "logits/generated": -2.500181198120117, "logits/real": -2.4853765964508057, "logps/generated": -255.6665802001953, "logps/real": -180.01492309570312, "loss": 0.0614, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.007906913757324, "rewards/margins": 9.470663070678711, "rewards/real": -3.537243604660034, "step": 1160 }, { "epoch": 0.68, "learning_rate": 1.7744020685197156e-07, "logits/generated": -2.451597213745117, "logits/real": -2.4578232765197754, "logps/generated": -269.48602294921875, "logps/real": -180.25762939453125, "loss": 0.0499, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.529942512512207, "rewards/margins": 11.10853385925293, "rewards/real": -3.421407699584961, "step": 1170 }, { "epoch": 0.69, "learning_rate": 1.7420814479638007e-07, "logits/generated": -2.4766621589660645, "logits/real": -2.428915023803711, "logps/generated": -268.610107421875, "logps/real": -183.23977661132812, "loss": 0.0714, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -13.613744735717773, "rewards/margins": 8.734598159790039, "rewards/real": -4.879148006439209, "step": 1180 }, { "epoch": 0.69, "learning_rate": 1.7097608274078861e-07, "logits/generated": -2.4673948287963867, "logits/real": -2.426107883453369, "logps/generated": -268.60968017578125, "logps/real": -185.16867065429688, "loss": 0.1291, "rewards/accuracies": 0.9375, "rewards/generated": -13.530069351196289, "rewards/margins": 9.933688163757324, "rewards/real": -3.5963797569274902, "step": 1190 }, { "epoch": 0.7, "learning_rate": 1.6774402068519713e-07, "logits/generated": -2.4350733757019043, "logits/real": -2.3934569358825684, "logps/generated": -263.73193359375, "logps/real": -164.91358947753906, "loss": 0.1007, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -14.513885498046875, "rewards/margins": 10.701467514038086, "rewards/real": -3.8124184608459473, "step": 1200 }, { "epoch": 0.7, "eval_logits/generated": -2.3948426246643066, "eval_logits/real": -2.425471067428589, "eval_logps/generated": -238.21556091308594, "eval_logps/real": -168.78074645996094, "eval_loss": 0.07581960409879684, "eval_rewards/accuracies": 0.9745222926139832, "eval_rewards/generated": -14.189876556396484, "eval_rewards/margins": 10.778358459472656, "eval_rewards/real": -3.4115185737609863, "eval_runtime": 324.5114, "eval_samples_per_second": 15.408, "eval_steps_per_second": 0.484, "step": 1200 }, { "epoch": 0.7, "learning_rate": 1.6451195862960567e-07, "logits/generated": -2.4927570819854736, "logits/real": -2.4464633464813232, "logps/generated": -231.09664916992188, "logps/real": -159.4072265625, "loss": 0.0937, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -11.880216598510742, "rewards/margins": 8.993806838989258, "rewards/real": -2.886411190032959, "step": 1210 }, { "epoch": 0.71, "learning_rate": 1.6127989657401424e-07, "logits/generated": -2.4784765243530273, "logits/real": -2.47477650642395, "logps/generated": -233.77877807617188, "logps/real": -159.3743438720703, "loss": 0.0946, "rewards/accuracies": 0.887499988079071, "rewards/generated": -11.6648588180542, "rewards/margins": 8.083456039428711, "rewards/real": -3.5814037322998047, "step": 1220 }, { "epoch": 0.72, "learning_rate": 1.5804783451842275e-07, "logits/generated": -2.490647077560425, "logits/real": -2.463700532913208, "logps/generated": -277.70172119140625, "logps/real": -174.46707153320312, "loss": 0.1369, "rewards/accuracies": 0.925000011920929, "rewards/generated": -15.150970458984375, "rewards/margins": 11.056761741638184, "rewards/real": -4.094208717346191, "step": 1230 }, { "epoch": 0.72, "learning_rate": 1.548157724628313e-07, "logits/generated": -2.551166534423828, "logits/real": -2.5086779594421387, "logps/generated": -257.8534240722656, "logps/real": -183.65255737304688, "loss": 0.1252, "rewards/accuracies": 0.925000011920929, "rewards/generated": -13.366009712219238, "rewards/margins": 9.787813186645508, "rewards/real": -3.578195095062256, "step": 1240 }, { "epoch": 0.73, "learning_rate": 1.515837104072398e-07, "logits/generated": -2.5603954792022705, "logits/real": -2.539309501647949, "logps/generated": -229.6404266357422, "logps/real": -172.22093200683594, "loss": 0.1479, "rewards/accuracies": 0.824999988079071, "rewards/generated": -11.011828422546387, "rewards/margins": 7.343997955322266, "rewards/real": -3.6678295135498047, "step": 1250 }, { "epoch": 0.73, "learning_rate": 1.4835164835164835e-07, "logits/generated": -2.575138568878174, "logits/real": -2.495459794998169, "logps/generated": -220.6327362060547, "logps/real": -144.017333984375, "loss": 0.0779, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -10.787625312805176, "rewards/margins": 8.316181182861328, "rewards/real": -2.4714438915252686, "step": 1260 }, { "epoch": 0.74, "learning_rate": 1.451195862960569e-07, "logits/generated": -2.5613296031951904, "logits/real": -2.569815158843994, "logps/generated": -232.4954071044922, "logps/real": -175.23074340820312, "loss": 0.0922, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -11.065740585327148, "rewards/margins": 7.911036491394043, "rewards/real": -3.1547024250030518, "step": 1270 }, { "epoch": 0.74, "learning_rate": 1.418875242404654e-07, "logits/generated": -2.539595365524292, "logits/real": -2.5092930793762207, "logps/generated": -220.0555419921875, "logps/real": -165.4029083251953, "loss": 0.1347, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -10.106549263000488, "rewards/margins": 7.346714019775391, "rewards/real": -2.7598352432250977, "step": 1280 }, { "epoch": 0.75, "learning_rate": 1.3865546218487394e-07, "logits/generated": -2.6019704341888428, "logits/real": -2.5571720600128174, "logps/generated": -237.4385223388672, "logps/real": -167.28253173828125, "loss": 0.0784, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.303973197937012, "rewards/margins": 8.126482963562012, "rewards/real": -2.1774911880493164, "step": 1290 }, { "epoch": 0.76, "learning_rate": 1.3542340012928246e-07, "logits/generated": -2.5232183933258057, "logits/real": -2.523380756378174, "logps/generated": -240.03897094726562, "logps/real": -154.43798828125, "loss": 0.1306, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -12.480694770812988, "rewards/margins": 9.408671379089355, "rewards/real": -3.072023868560791, "step": 1300 }, { "epoch": 0.76, "eval_logits/generated": -2.5270004272460938, "eval_logits/real": -2.537477493286133, "eval_logps/generated": -207.37860107421875, "eval_logps/real": -158.70806884765625, "eval_loss": 0.07650701701641083, "eval_rewards/accuracies": 0.9753184914588928, "eval_rewards/generated": -11.106179237365723, "eval_rewards/margins": 8.70193099975586, "eval_rewards/real": -2.404249906539917, "eval_runtime": 324.1, "eval_samples_per_second": 15.427, "eval_steps_per_second": 0.484, "step": 1300 }, { "epoch": 0.76, "learning_rate": 1.3219133807369102e-07, "logits/generated": -2.5183393955230713, "logits/real": -2.483584403991699, "logps/generated": -237.892578125, "logps/real": -154.9105682373047, "loss": 0.1098, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -12.671589851379395, "rewards/margins": 10.110515594482422, "rewards/real": -2.561074733734131, "step": 1310 }, { "epoch": 0.77, "learning_rate": 1.2895927601809956e-07, "logits/generated": -2.5193591117858887, "logits/real": -2.546970844268799, "logps/generated": -238.3896484375, "logps/real": -164.31768798828125, "loss": 0.0707, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.500856399536133, "rewards/margins": 9.552709579467773, "rewards/real": -2.9481449127197266, "step": 1320 }, { "epoch": 0.77, "learning_rate": 1.2572721396250808e-07, "logits/generated": -2.5152533054351807, "logits/real": -2.501941442489624, "logps/generated": -221.7640380859375, "logps/real": -161.3094940185547, "loss": 0.1377, "rewards/accuracies": 0.887499988079071, "rewards/generated": -10.399030685424805, "rewards/margins": 7.858689785003662, "rewards/real": -2.540339946746826, "step": 1330 }, { "epoch": 0.78, "learning_rate": 1.224951519069166e-07, "logits/generated": -2.5427238941192627, "logits/real": -2.5108070373535156, "logps/generated": -258.5174560546875, "logps/real": -167.56686401367188, "loss": 0.1183, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -12.709803581237793, "rewards/margins": 10.17547607421875, "rewards/real": -2.5343270301818848, "step": 1340 }, { "epoch": 0.79, "learning_rate": 1.1926308985132513e-07, "logits/generated": -2.528041124343872, "logits/real": -2.5164339542388916, "logps/generated": -224.2671356201172, "logps/real": -177.60060119628906, "loss": 0.0929, "rewards/accuracies": 0.875, "rewards/generated": -10.380260467529297, "rewards/margins": 7.477629661560059, "rewards/real": -2.9026293754577637, "step": 1350 }, { "epoch": 0.79, "learning_rate": 1.1603102779573367e-07, "logits/generated": -2.518709421157837, "logits/real": -2.4333877563476562, "logps/generated": -222.98892211914062, "logps/real": -153.88491821289062, "loss": 0.0828, "rewards/accuracies": 0.9375, "rewards/generated": -10.736051559448242, "rewards/margins": 7.6436614990234375, "rewards/real": -3.092390537261963, "step": 1360 }, { "epoch": 0.8, "learning_rate": 1.127989657401422e-07, "logits/generated": -2.5053441524505615, "logits/real": -2.4574134349823, "logps/generated": -243.2651824951172, "logps/real": -179.1326904296875, "loss": 0.1207, "rewards/accuracies": 0.875, "rewards/generated": -12.284380912780762, "rewards/margins": 8.803535461425781, "rewards/real": -3.4808456897735596, "step": 1370 }, { "epoch": 0.8, "learning_rate": 1.0956690368455074e-07, "logits/generated": -2.4903757572174072, "logits/real": -2.5189337730407715, "logps/generated": -226.8483428955078, "logps/real": -171.20724487304688, "loss": 0.0713, "rewards/accuracies": 0.887499988079071, "rewards/generated": -11.636832237243652, "rewards/margins": 8.194231033325195, "rewards/real": -3.4426021575927734, "step": 1380 }, { "epoch": 0.81, "learning_rate": 1.0633484162895927e-07, "logits/generated": -2.5010979175567627, "logits/real": -2.4416332244873047, "logps/generated": -225.2292938232422, "logps/real": -172.9666748046875, "loss": 0.0973, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -10.62346363067627, "rewards/margins": 6.646616458892822, "rewards/real": -3.976848602294922, "step": 1390 }, { "epoch": 0.81, "learning_rate": 1.031027795733678e-07, "logits/generated": -2.489281177520752, "logits/real": -2.480539560317993, "logps/generated": -241.0604248046875, "logps/real": -165.22604370117188, "loss": 0.1084, "rewards/accuracies": 0.887499988079071, "rewards/generated": -12.310083389282227, "rewards/margins": 9.358478546142578, "rewards/real": -2.951603651046753, "step": 1400 }, { "epoch": 0.81, "eval_logits/generated": -2.476224422454834, "eval_logits/real": -2.484823226928711, "eval_logps/generated": -220.34217834472656, "eval_logps/real": -162.47093200683594, "eval_loss": 0.07595483213663101, "eval_rewards/accuracies": 0.9745222926139832, "eval_rewards/generated": -12.402539253234863, "eval_rewards/margins": 9.622005462646484, "eval_rewards/real": -2.780533790588379, "eval_runtime": 325.3302, "eval_samples_per_second": 15.369, "eval_steps_per_second": 0.483, "step": 1400 }, { "epoch": 0.82, "learning_rate": 9.987071751777634e-08, "logits/generated": -2.517509937286377, "logits/real": -2.5105528831481934, "logps/generated": -220.71542358398438, "logps/real": -163.03253173828125, "loss": 0.0891, "rewards/accuracies": 0.9375, "rewards/generated": -10.003129959106445, "rewards/margins": 6.5026068687438965, "rewards/real": -3.500523328781128, "step": 1410 }, { "epoch": 0.83, "learning_rate": 9.663865546218488e-08, "logits/generated": -2.494823694229126, "logits/real": -2.4722931385040283, "logps/generated": -218.6509552001953, "logps/real": -150.20521545410156, "loss": 0.0958, "rewards/accuracies": 0.9375, "rewards/generated": -11.17399787902832, "rewards/margins": 8.31296157836914, "rewards/real": -2.861036777496338, "step": 1420 }, { "epoch": 0.83, "learning_rate": 9.340659340659341e-08, "logits/generated": -2.4990344047546387, "logits/real": -2.4319558143615723, "logps/generated": -241.905517578125, "logps/real": -166.25537109375, "loss": 0.1237, "rewards/accuracies": 0.875, "rewards/generated": -12.346821784973145, "rewards/margins": 8.885249137878418, "rewards/real": -3.461573839187622, "step": 1430 }, { "epoch": 0.84, "learning_rate": 9.017453135100193e-08, "logits/generated": -2.4997031688690186, "logits/real": -2.510288715362549, "logps/generated": -258.8919677734375, "logps/real": -195.13092041015625, "loss": 0.0683, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -13.229423522949219, "rewards/margins": 9.894552230834961, "rewards/real": -3.3348708152770996, "step": 1440 }, { "epoch": 0.84, "learning_rate": 8.694246929541046e-08, "logits/generated": -2.4908547401428223, "logits/real": -2.469104051589966, "logps/generated": -258.60723876953125, "logps/real": -183.93992614746094, "loss": 0.1175, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -12.868890762329102, "rewards/margins": 8.966911315917969, "rewards/real": -3.9019787311553955, "step": 1450 }, { "epoch": 0.85, "learning_rate": 8.371040723981899e-08, "logits/generated": -2.4881350994110107, "logits/real": -2.4609992504119873, "logps/generated": -257.45709228515625, "logps/real": -172.5931396484375, "loss": 0.0944, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -13.51276969909668, "rewards/margins": 9.734931945800781, "rewards/real": -3.777839183807373, "step": 1460 }, { "epoch": 0.86, "learning_rate": 8.047834518422754e-08, "logits/generated": -2.5072503089904785, "logits/real": -2.4428117275238037, "logps/generated": -254.014892578125, "logps/real": -170.81788635253906, "loss": 0.0791, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.408576011657715, "rewards/margins": 8.652534484863281, "rewards/real": -3.756040573120117, "step": 1470 }, { "epoch": 0.86, "learning_rate": 7.724628312863607e-08, "logits/generated": -2.4876208305358887, "logits/real": -2.463144302368164, "logps/generated": -272.12139892578125, "logps/real": -181.99913024902344, "loss": 0.0929, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -14.419462203979492, "rewards/margins": 10.72993278503418, "rewards/real": -3.68953013420105, "step": 1480 }, { "epoch": 0.87, "learning_rate": 7.40142210730446e-08, "logits/generated": -2.4878337383270264, "logits/real": -2.502197504043579, "logps/generated": -257.6641845703125, "logps/real": -180.44590759277344, "loss": 0.0694, "rewards/accuracies": 0.949999988079071, "rewards/generated": -12.998161315917969, "rewards/margins": 10.025131225585938, "rewards/real": -2.9730300903320312, "step": 1490 }, { "epoch": 0.87, "learning_rate": 7.078215901745313e-08, "logits/generated": -2.4765686988830566, "logits/real": -2.459664821624756, "logps/generated": -239.1787109375, "logps/real": -167.0284423828125, "loss": 0.1494, "rewards/accuracies": 0.887499988079071, "rewards/generated": -11.897302627563477, "rewards/margins": 8.363332748413086, "rewards/real": -3.5339698791503906, "step": 1500 }, { "epoch": 0.87, "eval_logits/generated": -2.465639591217041, "eval_logits/real": -2.4750711917877197, "eval_logps/generated": -226.3308868408203, "eval_logps/real": -164.7202606201172, "eval_loss": 0.0739506259560585, "eval_rewards/accuracies": 0.9713375568389893, "eval_rewards/generated": -13.001410484313965, "eval_rewards/margins": 9.995938301086426, "eval_rewards/real": -3.005469799041748, "eval_runtime": 325.1178, "eval_samples_per_second": 15.379, "eval_steps_per_second": 0.483, "step": 1500 }, { "epoch": 0.88, "learning_rate": 6.755009696186167e-08, "logits/generated": -2.4574570655822754, "logits/real": -2.478517770767212, "logps/generated": -235.34963989257812, "logps/real": -165.80545043945312, "loss": 0.0921, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.185359001159668, "rewards/margins": 8.462077140808105, "rewards/real": -3.723281145095825, "step": 1510 }, { "epoch": 0.88, "learning_rate": 6.43180349062702e-08, "logits/generated": -2.467890501022339, "logits/real": -2.449784994125366, "logps/generated": -265.53546142578125, "logps/real": -190.40701293945312, "loss": 0.0903, "rewards/accuracies": 0.925000011920929, "rewards/generated": -14.360349655151367, "rewards/margins": 10.199980735778809, "rewards/real": -4.160367965698242, "step": 1520 }, { "epoch": 0.89, "learning_rate": 6.108597285067872e-08, "logits/generated": -2.4615697860717773, "logits/real": -2.3793063163757324, "logps/generated": -239.954833984375, "logps/real": -161.84376525878906, "loss": 0.0975, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -12.927061080932617, "rewards/margins": 8.876288414001465, "rewards/real": -4.050771236419678, "step": 1530 }, { "epoch": 0.9, "learning_rate": 5.785391079508726e-08, "logits/generated": -2.4796242713928223, "logits/real": -2.4357521533966064, "logps/generated": -227.2936248779297, "logps/real": -163.6289825439453, "loss": 0.0818, "rewards/accuracies": 0.9375, "rewards/generated": -11.878579139709473, "rewards/margins": 8.282608985900879, "rewards/real": -3.595970630645752, "step": 1540 }, { "epoch": 0.9, "learning_rate": 5.46218487394958e-08, "logits/generated": -2.4726433753967285, "logits/real": -2.3735179901123047, "logps/generated": -239.4695587158203, "logps/real": -159.3350372314453, "loss": 0.0978, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.301145553588867, "rewards/margins": 8.831232070922852, "rewards/real": -3.4699130058288574, "step": 1550 }, { "epoch": 0.91, "learning_rate": 5.1389786683904325e-08, "logits/generated": -2.488933801651001, "logits/real": -2.4772191047668457, "logps/generated": -250.51992797851562, "logps/real": -159.865478515625, "loss": 0.0695, "rewards/accuracies": 0.925000011920929, "rewards/generated": -12.80781364440918, "rewards/margins": 9.52310848236084, "rewards/real": -3.2847042083740234, "step": 1560 }, { "epoch": 0.91, "learning_rate": 4.8157724628312865e-08, "logits/generated": -2.5149645805358887, "logits/real": -2.483194589614868, "logps/generated": -259.7389221191406, "logps/real": -188.4167022705078, "loss": 0.0978, "rewards/accuracies": 0.887499988079071, "rewards/generated": -12.594181060791016, "rewards/margins": 8.321154594421387, "rewards/real": -4.273025035858154, "step": 1570 }, { "epoch": 0.92, "learning_rate": 4.492566257272139e-08, "logits/generated": -2.498760461807251, "logits/real": -2.4244179725646973, "logps/generated": -255.7240753173828, "logps/real": -168.46018981933594, "loss": 0.1101, "rewards/accuracies": 0.9375, "rewards/generated": -12.637785911560059, "rewards/margins": 8.729381561279297, "rewards/real": -3.9084041118621826, "step": 1580 }, { "epoch": 0.92, "learning_rate": 4.169360051712993e-08, "logits/generated": -2.4851748943328857, "logits/real": -2.429105043411255, "logps/generated": -242.70553588867188, "logps/real": -165.17166137695312, "loss": 0.0845, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -12.373160362243652, "rewards/margins": 8.976916313171387, "rewards/real": -3.3962435722351074, "step": 1590 }, { "epoch": 0.93, "learning_rate": 3.846153846153846e-08, "logits/generated": -2.468567371368408, "logits/real": -2.375533103942871, "logps/generated": -254.6434326171875, "logps/real": -174.11781311035156, "loss": 0.1099, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -13.183265686035156, "rewards/margins": 8.735334396362305, "rewards/real": -4.447932243347168, "step": 1600 }, { "epoch": 0.93, "eval_logits/generated": -2.425264358520508, "eval_logits/real": -2.4319591522216797, "eval_logps/generated": -233.0531768798828, "eval_logps/real": -169.63656616210938, "eval_loss": 0.07743819802999496, "eval_rewards/accuracies": 0.9729299545288086, "eval_rewards/generated": -13.673635482788086, "eval_rewards/margins": 10.176533699035645, "eval_rewards/real": -3.497100591659546, "eval_runtime": 325.5057, "eval_samples_per_second": 15.361, "eval_steps_per_second": 0.482, "step": 1600 }, { "epoch": 0.94, "learning_rate": 3.5229476405946995e-08, "logits/generated": -2.4387266635894775, "logits/real": -2.408844470977783, "logps/generated": -240.92184448242188, "logps/real": -162.61293029785156, "loss": 0.1445, "rewards/accuracies": 0.875, "rewards/generated": -13.79778003692627, "rewards/margins": 10.461742401123047, "rewards/real": -3.3360390663146973, "step": 1610 }, { "epoch": 0.94, "learning_rate": 3.199741435035552e-08, "logits/generated": -2.44557785987854, "logits/real": -2.4553208351135254, "logps/generated": -238.5950164794922, "logps/real": -176.37416076660156, "loss": 0.1105, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -12.455824851989746, "rewards/margins": 8.037620544433594, "rewards/real": -4.418205261230469, "step": 1620 }, { "epoch": 0.95, "learning_rate": 2.8765352294764057e-08, "logits/generated": -2.457484006881714, "logits/real": -2.424367904663086, "logps/generated": -241.5567626953125, "logps/real": -170.40658569335938, "loss": 0.1305, "rewards/accuracies": 0.8125, "rewards/generated": -12.365758895874023, "rewards/margins": 8.254980087280273, "rewards/real": -4.110778331756592, "step": 1630 }, { "epoch": 0.95, "learning_rate": 2.553329023917259e-08, "logits/generated": -2.4767932891845703, "logits/real": -2.4457132816314697, "logps/generated": -255.2021026611328, "logps/real": -187.205078125, "loss": 0.0809, "rewards/accuracies": 0.875, "rewards/generated": -12.562596321105957, "rewards/margins": 8.74045467376709, "rewards/real": -3.82214093208313, "step": 1640 }, { "epoch": 0.96, "learning_rate": 2.2301228183581126e-08, "logits/generated": -2.4593024253845215, "logits/real": -2.4330244064331055, "logps/generated": -261.88787841796875, "logps/real": -179.29541015625, "loss": 0.1031, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -13.55891227722168, "rewards/margins": 9.299590110778809, "rewards/real": -4.259322166442871, "step": 1650 }, { "epoch": 0.97, "learning_rate": 1.906916612798966e-08, "logits/generated": -2.467500925064087, "logits/real": -2.4541561603546143, "logps/generated": -245.1857452392578, "logps/real": -171.3612060546875, "loss": 0.1162, "rewards/accuracies": 0.887499988079071, "rewards/generated": -12.426248550415039, "rewards/margins": 8.478652000427246, "rewards/real": -3.9475975036621094, "step": 1660 }, { "epoch": 0.97, "learning_rate": 1.5837104072398187e-08, "logits/generated": -2.459001064300537, "logits/real": -2.430206775665283, "logps/generated": -254.04745483398438, "logps/real": -169.44154357910156, "loss": 0.0977, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -13.104756355285645, "rewards/margins": 9.2105073928833, "rewards/real": -3.8942489624023438, "step": 1670 }, { "epoch": 0.98, "learning_rate": 1.2605042016806723e-08, "logits/generated": -2.448197364807129, "logits/real": -2.455432891845703, "logps/generated": -263.8450622558594, "logps/real": -177.14439392089844, "loss": 0.0979, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -13.550898551940918, "rewards/margins": 9.719705581665039, "rewards/real": -3.831193208694458, "step": 1680 }, { "epoch": 0.98, "learning_rate": 9.372979961215254e-09, "logits/generated": -2.4577107429504395, "logits/real": -2.4188618659973145, "logps/generated": -236.26318359375, "logps/real": -172.19528198242188, "loss": 0.0944, "rewards/accuracies": 0.925000011920929, "rewards/generated": -11.246160507202148, "rewards/margins": 7.3128533363342285, "rewards/real": -3.93330717086792, "step": 1690 }, { "epoch": 0.99, "learning_rate": 6.140917905623787e-09, "logits/generated": -2.471553325653076, "logits/real": -2.3995721340179443, "logps/generated": -262.373291015625, "logps/real": -165.32789611816406, "loss": 0.0906, "rewards/accuracies": 0.9375, "rewards/generated": -13.321420669555664, "rewards/margins": 10.259626388549805, "rewards/real": -3.0617949962615967, "step": 1700 }, { "epoch": 0.99, "eval_logits/generated": -2.4198453426361084, "eval_logits/real": -2.423133373260498, "eval_logps/generated": -228.7878875732422, "eval_logps/real": -165.37667846679688, "eval_loss": 0.07379047572612762, "eval_rewards/accuracies": 0.9713375568389893, "eval_rewards/generated": -13.247109413146973, "eval_rewards/margins": 10.17599868774414, "eval_rewards/real": -3.0711097717285156, "eval_runtime": 325.2984, "eval_samples_per_second": 15.371, "eval_steps_per_second": 0.483, "step": 1700 }, { "epoch": 0.99, "learning_rate": 2.9088558500323206e-09, "logits/generated": -2.4227101802825928, "logits/real": -2.464235305786133, "logps/generated": -243.1490020751953, "logps/real": -169.15988159179688, "loss": 0.0914, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -12.564062118530273, "rewards/margins": 8.887134552001953, "rewards/real": -3.676928758621216, "step": 1710 }, { "epoch": 1.0, "step": 1719, "total_flos": 0.0, "train_loss": 0.16738235295130943, "train_runtime": 14752.9454, "train_samples_per_second": 3.728, "train_steps_per_second": 0.117 } ], "logging_steps": 10, "max_steps": 1719, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }