|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1719, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9069767441860465e-09, |
|
"logits/generated": -3.012260675430298, |
|
"logits/real": -2.981379270553589, |
|
"logps/generated": -121.78553009033203, |
|
"logps/real": -157.20819091796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9069767441860464e-08, |
|
"logits/generated": -2.961106538772583, |
|
"logits/real": -2.9408955574035645, |
|
"logps/generated": -125.34223175048828, |
|
"logps/real": -137.5188446044922, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/generated": -0.0030116664711385965, |
|
"rewards/margins": 0.01261158287525177, |
|
"rewards/real": 0.009599916636943817, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.813953488372093e-08, |
|
"logits/generated": -2.963073253631592, |
|
"logits/real": -2.9351158142089844, |
|
"logps/generated": -122.87374114990234, |
|
"logps/real": -133.8837127685547, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/generated": -0.058080412447452545, |
|
"rewards/margins": 0.14583885669708252, |
|
"rewards/real": 0.08775845915079117, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.720930232558139e-08, |
|
"logits/generated": -2.9640278816223145, |
|
"logits/real": -2.9266650676727295, |
|
"logps/generated": -115.86125183105469, |
|
"logps/real": -129.8009796142578, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -0.30484524369239807, |
|
"rewards/margins": 0.5773354768753052, |
|
"rewards/real": 0.2724902033805847, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1627906976744186e-07, |
|
"logits/generated": -2.9708516597747803, |
|
"logits/real": -2.8813812732696533, |
|
"logps/generated": -122.1348876953125, |
|
"logps/real": -123.1031723022461, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.330232709646225, |
|
"rewards/margins": 0.7562737464904785, |
|
"rewards/real": 0.42604103684425354, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4534883720930232e-07, |
|
"logits/generated": -2.961805820465088, |
|
"logits/real": -2.8516037464141846, |
|
"logps/generated": -130.59262084960938, |
|
"logps/real": -131.277099609375, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.683895468711853, |
|
"rewards/margins": 1.2348084449768066, |
|
"rewards/real": 0.5509130358695984, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7441860465116279e-07, |
|
"logits/generated": -2.8933191299438477, |
|
"logits/real": -2.8156707286834717, |
|
"logps/generated": -131.11270141601562, |
|
"logps/real": -138.29629516601562, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -1.2479474544525146, |
|
"rewards/margins": 1.7186332941055298, |
|
"rewards/real": 0.4706856608390808, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0348837209302325e-07, |
|
"logits/generated": -2.84045672416687, |
|
"logits/real": -2.816912889480591, |
|
"logps/generated": -129.1736297607422, |
|
"logps/real": -138.9403076171875, |
|
"loss": 0.4151, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -1.7937465906143188, |
|
"rewards/margins": 2.168488025665283, |
|
"rewards/real": 0.3747415244579315, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3255813953488372e-07, |
|
"logits/generated": -2.8057825565338135, |
|
"logits/real": -2.7365589141845703, |
|
"logps/generated": -134.1271209716797, |
|
"logps/real": -128.5058135986328, |
|
"loss": 0.4281, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -2.476748466491699, |
|
"rewards/margins": 2.6212470531463623, |
|
"rewards/real": 0.14449895918369293, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.616279069767442e-07, |
|
"logits/generated": -2.8339638710021973, |
|
"logits/real": -2.7323813438415527, |
|
"logps/generated": -161.02420043945312, |
|
"logps/real": -132.14015197753906, |
|
"loss": 0.3775, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -3.9137425422668457, |
|
"rewards/margins": 3.994602918624878, |
|
"rewards/real": 0.0808596983551979, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9069767441860464e-07, |
|
"logits/generated": -2.744694948196411, |
|
"logits/real": -2.7067599296569824, |
|
"logps/generated": -170.92324829101562, |
|
"logps/real": -138.30003356933594, |
|
"loss": 0.3742, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -4.676175594329834, |
|
"rewards/margins": 4.555473804473877, |
|
"rewards/real": -0.12070190906524658, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_logits/generated": -2.743514060974121, |
|
"eval_logits/real": -2.7147889137268066, |
|
"eval_logps/generated": -163.19664001464844, |
|
"eval_logps/real": -138.36029052734375, |
|
"eval_loss": 0.22440293431282043, |
|
"eval_rewards/accuracies": 0.9657643437385559, |
|
"eval_rewards/generated": -6.68798303604126, |
|
"eval_rewards/margins": 6.318512439727783, |
|
"eval_rewards/real": -0.3694704473018646, |
|
"eval_runtime": 332.6054, |
|
"eval_samples_per_second": 15.033, |
|
"eval_steps_per_second": 0.472, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1976744186046514e-07, |
|
"logits/generated": -2.7230546474456787, |
|
"logits/real": -2.6995229721069336, |
|
"logps/generated": -147.0129852294922, |
|
"logps/real": -138.2716064453125, |
|
"loss": 0.3616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -3.036238193511963, |
|
"rewards/margins": 2.524616003036499, |
|
"rewards/real": -0.5116221904754639, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4883720930232557e-07, |
|
"logits/generated": -2.7442073822021484, |
|
"logits/real": -2.6913013458251953, |
|
"logps/generated": -181.04037475585938, |
|
"logps/real": -129.39993286132812, |
|
"loss": 0.353, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -6.460757255554199, |
|
"rewards/margins": 6.2252020835876465, |
|
"rewards/real": -0.2355557233095169, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.77906976744186e-07, |
|
"logits/generated": -2.718548536300659, |
|
"logits/real": -2.6518301963806152, |
|
"logps/generated": -178.575439453125, |
|
"logps/real": -130.4645233154297, |
|
"loss": 0.3546, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -6.095456123352051, |
|
"rewards/margins": 5.877336502075195, |
|
"rewards/real": -0.2181190699338913, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.069767441860465e-07, |
|
"logits/generated": -2.6532301902770996, |
|
"logits/real": -2.608750820159912, |
|
"logps/generated": -195.4745330810547, |
|
"logps/real": -143.8194580078125, |
|
"loss": 0.3001, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -7.106230735778809, |
|
"rewards/margins": 6.365324974060059, |
|
"rewards/real": -0.7409064173698425, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3604651162790694e-07, |
|
"logits/generated": -2.59714674949646, |
|
"logits/real": -2.526071310043335, |
|
"logps/generated": -216.6206817626953, |
|
"logps/real": -126.90464782714844, |
|
"loss": 0.3096, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -10.569665908813477, |
|
"rewards/margins": 9.900853157043457, |
|
"rewards/real": -0.6688116788864136, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6511627906976743e-07, |
|
"logits/generated": -2.5910415649414062, |
|
"logits/real": -2.5469748973846436, |
|
"logps/generated": -186.9461669921875, |
|
"logps/real": -154.80783081054688, |
|
"loss": 0.2858, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -6.714383125305176, |
|
"rewards/margins": 5.151293754577637, |
|
"rewards/real": -1.5630899667739868, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.941860465116279e-07, |
|
"logits/generated": -2.5762853622436523, |
|
"logits/real": -2.506405830383301, |
|
"logps/generated": -200.08053588867188, |
|
"logps/real": -147.4757843017578, |
|
"loss": 0.2498, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -8.602258682250977, |
|
"rewards/margins": 7.1432204246521, |
|
"rewards/real": -1.4590368270874023, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.974143503555268e-07, |
|
"logits/generated": -2.57206392288208, |
|
"logits/real": -2.546504497528076, |
|
"logps/generated": -240.4123077392578, |
|
"logps/real": -146.31198120117188, |
|
"loss": 0.306, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -11.691746711730957, |
|
"rewards/margins": 10.91369915008545, |
|
"rewards/real": -0.778047502040863, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.941822882999354e-07, |
|
"logits/generated": -2.58891224861145, |
|
"logits/real": -2.5609164237976074, |
|
"logps/generated": -281.37774658203125, |
|
"logps/real": -154.63778686523438, |
|
"loss": 0.2973, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -15.484718322753906, |
|
"rewards/margins": 14.176872253417969, |
|
"rewards/real": -1.307844877243042, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.909502262443438e-07, |
|
"logits/generated": -2.530224561691284, |
|
"logits/real": -2.517199993133545, |
|
"logps/generated": -273.39190673828125, |
|
"logps/real": -140.16693115234375, |
|
"loss": 0.2528, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -15.023447036743164, |
|
"rewards/margins": 14.073616027832031, |
|
"rewards/real": -0.9498294591903687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/generated": -2.457301378250122, |
|
"eval_logits/real": -2.4670825004577637, |
|
"eval_logps/generated": -274.8525085449219, |
|
"eval_logps/real": -147.06544494628906, |
|
"eval_loss": 0.13190196454524994, |
|
"eval_rewards/accuracies": 0.9697452187538147, |
|
"eval_rewards/generated": -17.85356903076172, |
|
"eval_rewards/margins": 16.61358070373535, |
|
"eval_rewards/real": -1.2399863004684448, |
|
"eval_runtime": 327.4999, |
|
"eval_samples_per_second": 15.267, |
|
"eval_steps_per_second": 0.479, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.877181641887524e-07, |
|
"logits/generated": -2.4890782833099365, |
|
"logits/real": -2.429434299468994, |
|
"logps/generated": -314.6974792480469, |
|
"logps/real": -135.95115661621094, |
|
"loss": 0.2686, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -19.41824722290039, |
|
"rewards/margins": 18.349300384521484, |
|
"rewards/real": -1.0689440965652466, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.84486102133161e-07, |
|
"logits/generated": -2.4631810188293457, |
|
"logits/real": -2.4491894245147705, |
|
"logps/generated": -289.0598449707031, |
|
"logps/real": -143.9029998779297, |
|
"loss": 0.2482, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -16.25137710571289, |
|
"rewards/margins": 15.00184440612793, |
|
"rewards/real": -1.2495319843292236, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.812540400775695e-07, |
|
"logits/generated": -2.45487642288208, |
|
"logits/real": -2.4820148944854736, |
|
"logps/generated": -251.5857696533203, |
|
"logps/real": -144.88449096679688, |
|
"loss": 0.2956, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -13.020822525024414, |
|
"rewards/margins": 11.935991287231445, |
|
"rewards/real": -1.0848290920257568, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.78021978021978e-07, |
|
"logits/generated": -2.500182867050171, |
|
"logits/real": -2.5177927017211914, |
|
"logps/generated": -212.17990112304688, |
|
"logps/real": -164.4898223876953, |
|
"loss": 0.2426, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -8.700544357299805, |
|
"rewards/margins": 7.066586971282959, |
|
"rewards/real": -1.6339576244354248, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.747899159663865e-07, |
|
"logits/generated": -2.469505548477173, |
|
"logits/real": -2.513817548751831, |
|
"logps/generated": -250.53787231445312, |
|
"logps/real": -166.6981658935547, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -13.388379096984863, |
|
"rewards/margins": 11.607447624206543, |
|
"rewards/real": -1.780932068824768, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7155785391079506e-07, |
|
"logits/generated": -2.5186924934387207, |
|
"logits/real": -2.4782614707946777, |
|
"logps/generated": -267.3475646972656, |
|
"logps/real": -157.3760223388672, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -14.050939559936523, |
|
"rewards/margins": 11.882719039916992, |
|
"rewards/real": -2.1682217121124268, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.683257918552036e-07, |
|
"logits/generated": -2.5240137577056885, |
|
"logits/real": -2.484891653060913, |
|
"logps/generated": -282.6662902832031, |
|
"logps/real": -164.34274291992188, |
|
"loss": 0.2201, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/generated": -16.37204360961914, |
|
"rewards/margins": 14.235898971557617, |
|
"rewards/real": -2.1361422538757324, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6509372979961214e-07, |
|
"logits/generated": -2.4790737628936768, |
|
"logits/real": -2.4403738975524902, |
|
"logps/generated": -236.24008178710938, |
|
"logps/real": -155.16851806640625, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -12.50011157989502, |
|
"rewards/margins": 11.145486831665039, |
|
"rewards/real": -1.354625940322876, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.618616677440207e-07, |
|
"logits/generated": -2.430192232131958, |
|
"logits/real": -2.3869004249572754, |
|
"logps/generated": -249.726806640625, |
|
"logps/real": -151.4705352783203, |
|
"loss": 0.2224, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -13.20459270477295, |
|
"rewards/margins": 11.15159797668457, |
|
"rewards/real": -2.0529935359954834, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5862960568842917e-07, |
|
"logits/generated": -2.480708122253418, |
|
"logits/real": -2.369931697845459, |
|
"logps/generated": -282.884033203125, |
|
"logps/real": -145.2495574951172, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -15.511686325073242, |
|
"rewards/margins": 13.962892532348633, |
|
"rewards/real": -1.5487936735153198, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_logits/generated": -2.425682783126831, |
|
"eval_logits/real": -2.3622233867645264, |
|
"eval_logps/generated": -293.674560546875, |
|
"eval_logps/real": -151.37994384765625, |
|
"eval_loss": 0.11716413497924805, |
|
"eval_rewards/accuracies": 0.9617834687232971, |
|
"eval_rewards/generated": -19.735776901245117, |
|
"eval_rewards/margins": 18.064340591430664, |
|
"eval_rewards/real": -1.6714372634887695, |
|
"eval_runtime": 328.1821, |
|
"eval_samples_per_second": 15.235, |
|
"eval_steps_per_second": 0.478, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5539754363283774e-07, |
|
"logits/generated": -2.4415905475616455, |
|
"logits/real": -2.3295607566833496, |
|
"logps/generated": -287.18682861328125, |
|
"logps/real": -155.09823608398438, |
|
"loss": 0.2118, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -16.675642013549805, |
|
"rewards/margins": 14.781652450561523, |
|
"rewards/real": -1.8939898014068604, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5216548157724625e-07, |
|
"logits/generated": -2.463688850402832, |
|
"logits/real": -2.4042744636535645, |
|
"logps/generated": -241.97543334960938, |
|
"logps/real": -152.55368041992188, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -12.119455337524414, |
|
"rewards/margins": 10.819900512695312, |
|
"rewards/real": -1.2995555400848389, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.489334195216548e-07, |
|
"logits/generated": -2.4680073261260986, |
|
"logits/real": -2.427863359451294, |
|
"logps/generated": -251.1258087158203, |
|
"logps/real": -163.27366638183594, |
|
"loss": 0.2059, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -12.832430839538574, |
|
"rewards/margins": 11.404412269592285, |
|
"rewards/real": -1.4280211925506592, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4570135746606334e-07, |
|
"logits/generated": -2.4172284603118896, |
|
"logits/real": -2.3573529720306396, |
|
"logps/generated": -298.1008605957031, |
|
"logps/real": -156.8496856689453, |
|
"loss": 0.1686, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -18.446123123168945, |
|
"rewards/margins": 16.2222900390625, |
|
"rewards/real": -2.2238337993621826, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4246929541047185e-07, |
|
"logits/generated": -2.4144504070281982, |
|
"logits/real": -2.3207590579986572, |
|
"logps/generated": -212.0926055908203, |
|
"logps/real": -166.0170135498047, |
|
"loss": 0.2305, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -9.594592094421387, |
|
"rewards/margins": 6.877285003662109, |
|
"rewards/real": -2.7173075675964355, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3923723335488036e-07, |
|
"logits/generated": -2.4157674312591553, |
|
"logits/real": -2.4187474250793457, |
|
"logps/generated": -303.06683349609375, |
|
"logps/real": -163.6787872314453, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -18.166648864746094, |
|
"rewards/margins": 15.954483032226562, |
|
"rewards/real": -2.2121691703796387, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3600517129928893e-07, |
|
"logits/generated": -2.465456485748291, |
|
"logits/real": -2.445255756378174, |
|
"logps/generated": -245.350341796875, |
|
"logps/real": -177.14071655273438, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -10.88718032836914, |
|
"rewards/margins": 8.129331588745117, |
|
"rewards/real": -2.7578492164611816, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.327731092436975e-07, |
|
"logits/generated": -2.4552018642425537, |
|
"logits/real": -2.424823760986328, |
|
"logps/generated": -252.9711456298828, |
|
"logps/real": -154.53909301757812, |
|
"loss": 0.254, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -13.141611099243164, |
|
"rewards/margins": 10.592477798461914, |
|
"rewards/real": -2.5491321086883545, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2954104718810596e-07, |
|
"logits/generated": -2.464360475540161, |
|
"logits/real": -2.444789409637451, |
|
"logps/generated": -296.9568786621094, |
|
"logps/real": -163.8868408203125, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -17.908769607543945, |
|
"rewards/margins": 15.718464851379395, |
|
"rewards/real": -2.190305233001709, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2630898513251453e-07, |
|
"logits/generated": -2.506937265396118, |
|
"logits/real": -2.470973253250122, |
|
"logps/generated": -294.0633239746094, |
|
"logps/real": -160.49642944335938, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/generated": -17.2155704498291, |
|
"rewards/margins": 14.985618591308594, |
|
"rewards/real": -2.229950189590454, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/generated": -2.4888620376586914, |
|
"eval_logits/real": -2.452549695968628, |
|
"eval_logps/generated": -303.0499572753906, |
|
"eval_logps/real": -154.09176635742188, |
|
"eval_loss": 0.1093968003988266, |
|
"eval_rewards/accuracies": 0.9729299545288086, |
|
"eval_rewards/generated": -20.673315048217773, |
|
"eval_rewards/margins": 18.73069953918457, |
|
"eval_rewards/real": -1.942617416381836, |
|
"eval_runtime": 327.2801, |
|
"eval_samples_per_second": 15.277, |
|
"eval_steps_per_second": 0.48, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logits/generated": -2.543400287628174, |
|
"logits/real": -2.417271614074707, |
|
"logps/generated": -281.9232177734375, |
|
"logps/real": -161.21182250976562, |
|
"loss": 0.1673, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -14.634592056274414, |
|
"rewards/margins": 12.80103588104248, |
|
"rewards/real": -1.8335540294647217, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.198448610213316e-07, |
|
"logits/generated": -2.477651357650757, |
|
"logits/real": -2.4087789058685303, |
|
"logps/generated": -285.4056701660156, |
|
"logps/real": -157.8638916015625, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -16.256460189819336, |
|
"rewards/margins": 13.421223640441895, |
|
"rewards/real": -2.835240602493286, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.166127989657401e-07, |
|
"logits/generated": -2.4897210597991943, |
|
"logits/real": -2.4585866928100586, |
|
"logps/generated": -301.2572937011719, |
|
"logps/real": -160.141845703125, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -17.867128372192383, |
|
"rewards/margins": 15.476908683776855, |
|
"rewards/real": -2.3902173042297363, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1338073691014864e-07, |
|
"logits/generated": -2.5035512447357178, |
|
"logits/real": -2.3802120685577393, |
|
"logps/generated": -332.97222900390625, |
|
"logps/real": -154.29495239257812, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -21.23163414001465, |
|
"rewards/margins": 18.370525360107422, |
|
"rewards/real": -2.861109972000122, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1014867485455715e-07, |
|
"logits/generated": -2.418759822845459, |
|
"logits/real": -2.3379578590393066, |
|
"logps/generated": -336.86175537109375, |
|
"logps/real": -156.2376251220703, |
|
"loss": 0.1849, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -22.108346939086914, |
|
"rewards/margins": 19.20734977722168, |
|
"rewards/real": -2.900996685028076, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.069166127989657e-07, |
|
"logits/generated": -2.4547057151794434, |
|
"logits/real": -2.286864995956421, |
|
"logps/generated": -276.150390625, |
|
"logps/real": -144.59564208984375, |
|
"loss": 0.1647, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -16.615272521972656, |
|
"rewards/margins": 14.403627395629883, |
|
"rewards/real": -2.211641788482666, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.036845507433743e-07, |
|
"logits/generated": -2.4923219680786133, |
|
"logits/real": -2.362297296524048, |
|
"logps/generated": -312.7582702636719, |
|
"logps/real": -159.96646118164062, |
|
"loss": 0.1664, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -18.795948028564453, |
|
"rewards/margins": 16.829730987548828, |
|
"rewards/real": -1.9662189483642578, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.004524886877828e-07, |
|
"logits/generated": -2.490182399749756, |
|
"logits/real": -2.3277366161346436, |
|
"logps/generated": -391.92047119140625, |
|
"logps/real": -153.3131866455078, |
|
"loss": 0.1659, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -28.46506118774414, |
|
"rewards/margins": 26.065074920654297, |
|
"rewards/real": -2.39998459815979, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.972204266321913e-07, |
|
"logits/generated": -2.507981777191162, |
|
"logits/real": -2.405111789703369, |
|
"logps/generated": -244.44027709960938, |
|
"logps/real": -147.1367950439453, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -13.030759811401367, |
|
"rewards/margins": 11.476921081542969, |
|
"rewards/real": -1.5538378953933716, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9398836457659983e-07, |
|
"logits/generated": -2.5344769954681396, |
|
"logits/real": -2.3722431659698486, |
|
"logps/generated": -196.79986572265625, |
|
"logps/real": -149.31695556640625, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -6.881800174713135, |
|
"rewards/margins": 5.857499599456787, |
|
"rewards/real": -1.0242998600006104, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/generated": -2.386992931365967, |
|
"eval_logits/real": -2.344109296798706, |
|
"eval_logps/generated": -179.73768615722656, |
|
"eval_logps/real": -144.66737365722656, |
|
"eval_loss": 0.11515343934297562, |
|
"eval_rewards/accuracies": 0.9665604829788208, |
|
"eval_rewards/generated": -8.342087745666504, |
|
"eval_rewards/margins": 7.341910362243652, |
|
"eval_rewards/real": -1.000178575515747, |
|
"eval_runtime": 326.2978, |
|
"eval_samples_per_second": 15.323, |
|
"eval_steps_per_second": 0.481, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.907563025210084e-07, |
|
"logits/generated": -2.3947510719299316, |
|
"logits/real": -2.409266471862793, |
|
"logps/generated": -194.82321166992188, |
|
"logps/real": -157.6947784423828, |
|
"loss": 0.1571, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -7.0470075607299805, |
|
"rewards/margins": 5.375405311584473, |
|
"rewards/real": -1.6716020107269287, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.875242404654169e-07, |
|
"logits/generated": -2.335791826248169, |
|
"logits/real": -2.3169474601745605, |
|
"logps/generated": -207.33126831054688, |
|
"logps/real": -156.51400756835938, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -9.35986042022705, |
|
"rewards/margins": 6.684755802154541, |
|
"rewards/real": -2.675104856491089, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.842921784098255e-07, |
|
"logits/generated": -2.3748762607574463, |
|
"logits/real": -2.400601625442505, |
|
"logps/generated": -222.042724609375, |
|
"logps/real": -170.61839294433594, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -9.549965858459473, |
|
"rewards/margins": 7.031239986419678, |
|
"rewards/real": -2.518725633621216, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8106011635423394e-07, |
|
"logits/generated": -2.3414711952209473, |
|
"logits/real": -2.2793216705322266, |
|
"logps/generated": -219.4429168701172, |
|
"logps/real": -160.81027221679688, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -10.753046989440918, |
|
"rewards/margins": 8.526832580566406, |
|
"rewards/real": -2.22621488571167, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.778280542986425e-07, |
|
"logits/generated": -2.356356143951416, |
|
"logits/real": -2.2714390754699707, |
|
"logps/generated": -216.5394287109375, |
|
"logps/real": -145.5234832763672, |
|
"loss": 0.1829, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -10.430225372314453, |
|
"rewards/margins": 8.845321655273438, |
|
"rewards/real": -1.5849040746688843, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.745959922430511e-07, |
|
"logits/generated": -2.3817245960235596, |
|
"logits/real": -2.2892487049102783, |
|
"logps/generated": -200.70140075683594, |
|
"logps/real": -155.33592224121094, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -8.53106689453125, |
|
"rewards/margins": 6.320080280303955, |
|
"rewards/real": -2.210986614227295, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.713639301874596e-07, |
|
"logits/generated": -2.365756034851074, |
|
"logits/real": -2.330933094024658, |
|
"logps/generated": -228.14828491210938, |
|
"logps/real": -170.01014709472656, |
|
"loss": 0.1399, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -10.85645866394043, |
|
"rewards/margins": 8.518800735473633, |
|
"rewards/real": -2.3376574516296387, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6813186813186816e-07, |
|
"logits/generated": -2.2974698543548584, |
|
"logits/real": -2.2853493690490723, |
|
"logps/generated": -231.7186737060547, |
|
"logps/real": -154.68409729003906, |
|
"loss": 0.144, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -11.895675659179688, |
|
"rewards/margins": 9.608539581298828, |
|
"rewards/real": -2.2871367931365967, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.648998060762766e-07, |
|
"logits/generated": -2.323545217514038, |
|
"logits/real": -2.2863316535949707, |
|
"logps/generated": -216.95333862304688, |
|
"logps/real": -152.7818145751953, |
|
"loss": 0.1667, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -9.12224006652832, |
|
"rewards/margins": 7.0738372802734375, |
|
"rewards/real": -2.0484039783477783, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.616677440206852e-07, |
|
"logits/generated": -2.39691162109375, |
|
"logits/real": -2.270139217376709, |
|
"logps/generated": -199.4903106689453, |
|
"logps/real": -154.24615478515625, |
|
"loss": 0.1517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -6.92580509185791, |
|
"rewards/margins": 5.0083699226379395, |
|
"rewards/real": -1.9174346923828125, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/generated": -2.2691421508789062, |
|
"eval_logits/real": -2.274235725402832, |
|
"eval_logps/generated": -225.55331420898438, |
|
"eval_logps/real": -151.24253845214844, |
|
"eval_loss": 0.09844768047332764, |
|
"eval_rewards/accuracies": 0.9745222926139832, |
|
"eval_rewards/generated": -12.923652648925781, |
|
"eval_rewards/margins": 11.265958786010742, |
|
"eval_rewards/real": -1.6576942205429077, |
|
"eval_runtime": 324.4499, |
|
"eval_samples_per_second": 15.411, |
|
"eval_steps_per_second": 0.484, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.584356819650937e-07, |
|
"logits/generated": -2.3561298847198486, |
|
"logits/real": -2.2546021938323975, |
|
"logps/generated": -272.76910400390625, |
|
"logps/real": -166.93630981445312, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -13.198400497436523, |
|
"rewards/margins": 10.639951705932617, |
|
"rewards/real": -2.5584499835968018, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5520361990950227e-07, |
|
"logits/generated": -2.3207361698150635, |
|
"logits/real": -2.2687323093414307, |
|
"logps/generated": -254.7984619140625, |
|
"logps/real": -156.127197265625, |
|
"loss": 0.1328, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -13.316617965698242, |
|
"rewards/margins": 11.088491439819336, |
|
"rewards/real": -2.228126049041748, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.519715578539108e-07, |
|
"logits/generated": -2.2628586292266846, |
|
"logits/real": -2.2417876720428467, |
|
"logps/generated": -217.26290893554688, |
|
"logps/real": -147.60806274414062, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -10.866621017456055, |
|
"rewards/margins": 8.915276527404785, |
|
"rewards/real": -1.95134699344635, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.487394957983193e-07, |
|
"logits/generated": -2.2968461513519287, |
|
"logits/real": -2.362973928451538, |
|
"logps/generated": -230.63998413085938, |
|
"logps/real": -171.29905700683594, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -10.994275093078613, |
|
"rewards/margins": 8.367854118347168, |
|
"rewards/real": -2.626420497894287, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4550743374272786e-07, |
|
"logits/generated": -2.4199037551879883, |
|
"logits/real": -2.3053011894226074, |
|
"logps/generated": -240.74765014648438, |
|
"logps/real": -161.87173461914062, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -11.601387023925781, |
|
"rewards/margins": 8.92815113067627, |
|
"rewards/real": -2.6732351779937744, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.422753716871364e-07, |
|
"logits/generated": -2.2776732444763184, |
|
"logits/real": -2.212689161300659, |
|
"logps/generated": -256.89129638671875, |
|
"logps/real": -144.78775024414062, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -14.179117202758789, |
|
"rewards/margins": 11.40053653717041, |
|
"rewards/real": -2.778578996658325, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3904330963154494e-07, |
|
"logits/generated": -2.2712647914886475, |
|
"logits/real": -2.31594181060791, |
|
"logps/generated": -285.45355224609375, |
|
"logps/real": -159.91629028320312, |
|
"loss": 0.1318, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -15.960909843444824, |
|
"rewards/margins": 13.720555305480957, |
|
"rewards/real": -2.2403564453125, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.358112475759534e-07, |
|
"logits/generated": -2.2503881454467773, |
|
"logits/real": -2.2118101119995117, |
|
"logps/generated": -269.7525939941406, |
|
"logps/real": -148.83535766601562, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -14.820501327514648, |
|
"rewards/margins": 12.614280700683594, |
|
"rewards/real": -2.206221580505371, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3257918552036197e-07, |
|
"logits/generated": -2.292738676071167, |
|
"logits/real": -2.2858123779296875, |
|
"logps/generated": -216.58059692382812, |
|
"logps/real": -155.36279296875, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -10.16190242767334, |
|
"rewards/margins": 8.57901668548584, |
|
"rewards/real": -1.5828853845596313, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.293471234647705e-07, |
|
"logits/generated": -2.191201686859131, |
|
"logits/real": -2.1718242168426514, |
|
"logps/generated": -248.41610717773438, |
|
"logps/real": -161.17881774902344, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -14.112585067749023, |
|
"rewards/margins": 11.10939884185791, |
|
"rewards/real": -3.0031871795654297, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/generated": -2.234299659729004, |
|
"eval_logits/real": -2.2124428749084473, |
|
"eval_logps/generated": -238.25738525390625, |
|
"eval_logps/real": -154.1605224609375, |
|
"eval_loss": 0.0865996927022934, |
|
"eval_rewards/accuracies": 0.9745222926139832, |
|
"eval_rewards/generated": -14.19405746459961, |
|
"eval_rewards/margins": 12.244565963745117, |
|
"eval_rewards/real": -1.949493408203125, |
|
"eval_runtime": 326.243, |
|
"eval_samples_per_second": 15.326, |
|
"eval_steps_per_second": 0.481, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2611506140917905e-07, |
|
"logits/generated": -2.3678855895996094, |
|
"logits/real": -2.254812717437744, |
|
"logps/generated": -200.08316040039062, |
|
"logps/real": -157.531494140625, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -8.445894241333008, |
|
"rewards/margins": 6.2266740798950195, |
|
"rewards/real": -2.2192206382751465, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2288299935358757e-07, |
|
"logits/generated": -2.294254779815674, |
|
"logits/real": -2.110302448272705, |
|
"logps/generated": -250.7073516845703, |
|
"logps/real": -144.10202026367188, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -13.250715255737305, |
|
"rewards/margins": 10.665332794189453, |
|
"rewards/real": -2.585383415222168, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.196509372979961e-07, |
|
"logits/generated": -2.277039051055908, |
|
"logits/real": -2.163405656814575, |
|
"logps/generated": -249.3871612548828, |
|
"logps/real": -158.72262573242188, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.719002723693848, |
|
"rewards/margins": 10.320619583129883, |
|
"rewards/real": -2.398383617401123, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1641887524240465e-07, |
|
"logits/generated": -2.1525888442993164, |
|
"logits/real": -2.1593430042266846, |
|
"logps/generated": -276.4872741699219, |
|
"logps/real": -169.02743530273438, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -15.876765251159668, |
|
"rewards/margins": 13.597285270690918, |
|
"rewards/real": -2.279479503631592, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1318681318681316e-07, |
|
"logits/generated": -2.177074432373047, |
|
"logits/real": -2.260298013687134, |
|
"logps/generated": -236.02206420898438, |
|
"logps/real": -170.76596069335938, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -11.86265754699707, |
|
"rewards/margins": 9.2439546585083, |
|
"rewards/real": -2.6187024116516113, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0995475113122173e-07, |
|
"logits/generated": -2.1248366832733154, |
|
"logits/real": -2.187145471572876, |
|
"logps/generated": -271.3382263183594, |
|
"logps/real": -168.44320678710938, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -15.90544319152832, |
|
"rewards/margins": 12.456186294555664, |
|
"rewards/real": -3.4492554664611816, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0672268907563024e-07, |
|
"logits/generated": -2.2197773456573486, |
|
"logits/real": -2.114558219909668, |
|
"logps/generated": -241.92642211914062, |
|
"logps/real": -143.5634307861328, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -13.74907398223877, |
|
"rewards/margins": 11.252575874328613, |
|
"rewards/real": -2.4964985847473145, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0349062702003876e-07, |
|
"logits/generated": -2.202606678009033, |
|
"logits/real": -2.1499722003936768, |
|
"logps/generated": -271.87738037109375, |
|
"logps/real": -161.31857299804688, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -16.116554260253906, |
|
"rewards/margins": 13.589006423950195, |
|
"rewards/real": -2.5275490283966064, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0025856496444727e-07, |
|
"logits/generated": -2.345546007156372, |
|
"logits/real": -2.2405107021331787, |
|
"logps/generated": -220.90701293945312, |
|
"logps/real": -160.68800354003906, |
|
"loss": 0.1607, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -10.754340171813965, |
|
"rewards/margins": 8.159425735473633, |
|
"rewards/real": -2.5949156284332275, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9702650290885584e-07, |
|
"logits/generated": -2.2388525009155273, |
|
"logits/real": -2.1353354454040527, |
|
"logps/generated": -277.51861572265625, |
|
"logps/real": -154.5572509765625, |
|
"loss": 0.1135, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -16.760658264160156, |
|
"rewards/margins": 13.79723072052002, |
|
"rewards/real": -2.963425874710083, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/generated": -2.1789329051971436, |
|
"eval_logits/real": -2.198742628097534, |
|
"eval_logps/generated": -260.81390380859375, |
|
"eval_logps/real": -164.8361358642578, |
|
"eval_loss": 0.08095261454582214, |
|
"eval_rewards/accuracies": 0.9785031676292419, |
|
"eval_rewards/generated": -16.449708938598633, |
|
"eval_rewards/margins": 13.432653427124023, |
|
"eval_rewards/real": -3.017056465148926, |
|
"eval_runtime": 325.1652, |
|
"eval_samples_per_second": 15.377, |
|
"eval_steps_per_second": 0.483, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9379444085326436e-07, |
|
"logits/generated": -2.269972085952759, |
|
"logits/real": -2.200730085372925, |
|
"logps/generated": -241.37060546875, |
|
"logps/real": -160.4932403564453, |
|
"loss": 0.1393, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -12.075285911560059, |
|
"rewards/margins": 8.877761840820312, |
|
"rewards/real": -3.1975245475769043, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.905623787976729e-07, |
|
"logits/generated": -2.1529345512390137, |
|
"logits/real": -2.130723237991333, |
|
"logps/generated": -278.5174560546875, |
|
"logps/real": -172.23374938964844, |
|
"loss": 0.1035, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -16.6492977142334, |
|
"rewards/margins": 13.676687240600586, |
|
"rewards/real": -2.9726104736328125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8733031674208144e-07, |
|
"logits/generated": -2.264868974685669, |
|
"logits/real": -2.2218079566955566, |
|
"logps/generated": -233.82815551757812, |
|
"logps/real": -155.95506286621094, |
|
"loss": 0.1595, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -11.269330024719238, |
|
"rewards/margins": 8.389703750610352, |
|
"rewards/real": -2.879626750946045, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8409825468648995e-07, |
|
"logits/generated": -2.287550926208496, |
|
"logits/real": -2.292382001876831, |
|
"logps/generated": -261.179443359375, |
|
"logps/real": -170.32998657226562, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -14.10278034210205, |
|
"rewards/margins": 11.701745986938477, |
|
"rewards/real": -2.4010345935821533, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.808661926308985e-07, |
|
"logits/generated": -2.33073091506958, |
|
"logits/real": -2.274296283721924, |
|
"logps/generated": -241.5367431640625, |
|
"logps/real": -149.47222900390625, |
|
"loss": 0.115, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -12.70106315612793, |
|
"rewards/margins": 10.1942777633667, |
|
"rewards/real": -2.5067856311798096, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7763413057530703e-07, |
|
"logits/generated": -2.38000226020813, |
|
"logits/real": -2.3325276374816895, |
|
"logps/generated": -247.8423614501953, |
|
"logps/real": -173.57339477539062, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -11.641576766967773, |
|
"rewards/margins": 9.302278518676758, |
|
"rewards/real": -2.339297294616699, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.744020685197156e-07, |
|
"logits/generated": -2.4010801315307617, |
|
"logits/real": -2.257711887359619, |
|
"logps/generated": -246.85153198242188, |
|
"logps/real": -166.25039672851562, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -11.700878143310547, |
|
"rewards/margins": 9.326895713806152, |
|
"rewards/real": -2.3739829063415527, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7117000646412406e-07, |
|
"logits/generated": -2.350351572036743, |
|
"logits/real": -2.226243734359741, |
|
"logps/generated": -263.51776123046875, |
|
"logps/real": -150.32102966308594, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -14.319729804992676, |
|
"rewards/margins": 12.047926902770996, |
|
"rewards/real": -2.2718007564544678, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6793794440853263e-07, |
|
"logits/generated": -2.4051427841186523, |
|
"logits/real": -2.3996026515960693, |
|
"logps/generated": -235.8258819580078, |
|
"logps/real": -158.03170776367188, |
|
"loss": 0.1842, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.911969184875488, |
|
"rewards/margins": 11.036763191223145, |
|
"rewards/real": -1.8752062320709229, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6470588235294114e-07, |
|
"logits/generated": -2.4504921436309814, |
|
"logits/real": -2.312851905822754, |
|
"logps/generated": -253.6083526611328, |
|
"logps/real": -168.83355712890625, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -12.168034553527832, |
|
"rewards/margins": 8.825535774230957, |
|
"rewards/real": -3.3424973487854004, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/generated": -2.3294870853424072, |
|
"eval_logits/real": -2.3367509841918945, |
|
"eval_logps/generated": -244.4078369140625, |
|
"eval_logps/real": -160.21507263183594, |
|
"eval_loss": 0.08480827510356903, |
|
"eval_rewards/accuracies": 0.9729299545288086, |
|
"eval_rewards/generated": -14.809103965759277, |
|
"eval_rewards/margins": 12.254154205322266, |
|
"eval_rewards/real": -2.5549488067626953, |
|
"eval_runtime": 325.558, |
|
"eval_samples_per_second": 15.358, |
|
"eval_steps_per_second": 0.482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.614738202973497e-07, |
|
"logits/generated": -2.37715482711792, |
|
"logits/real": -2.3596484661102295, |
|
"logps/generated": -225.02578735351562, |
|
"logps/real": -159.61839294433594, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -11.049263000488281, |
|
"rewards/margins": 7.907676696777344, |
|
"rewards/real": -3.1415863037109375, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.582417582417583e-07, |
|
"logits/generated": -2.3448710441589355, |
|
"logits/real": -2.323169231414795, |
|
"logps/generated": -246.141357421875, |
|
"logps/real": -171.561767578125, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.90569019317627, |
|
"rewards/margins": 9.77393913269043, |
|
"rewards/real": -3.1317505836486816, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5500969618616674e-07, |
|
"logits/generated": -2.3373780250549316, |
|
"logits/real": -2.2862701416015625, |
|
"logps/generated": -267.49029541015625, |
|
"logps/real": -170.78024291992188, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -14.549161911010742, |
|
"rewards/margins": 11.566902160644531, |
|
"rewards/real": -2.9822611808776855, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.517776341305753e-07, |
|
"logits/generated": -2.359157085418701, |
|
"logits/real": -2.3121438026428223, |
|
"logps/generated": -207.0237274169922, |
|
"logps/real": -151.88720703125, |
|
"loss": 0.1634, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -10.258100509643555, |
|
"rewards/margins": 7.330819606781006, |
|
"rewards/real": -2.927279472351074, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.485455720749838e-07, |
|
"logits/generated": -2.397444009780884, |
|
"logits/real": -2.3451006412506104, |
|
"logps/generated": -255.43264770507812, |
|
"logps/real": -172.95346069335938, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -12.080055236816406, |
|
"rewards/margins": 9.478727340698242, |
|
"rewards/real": -2.601327419281006, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4531351001939233e-07, |
|
"logits/generated": -2.354841947555542, |
|
"logits/real": -2.379093885421753, |
|
"logps/generated": -275.9471435546875, |
|
"logps/real": -189.16122436523438, |
|
"loss": 0.1414, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -14.650967597961426, |
|
"rewards/margins": 11.281542778015137, |
|
"rewards/real": -3.369422435760498, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.420814479638009e-07, |
|
"logits/generated": -2.3819050788879395, |
|
"logits/real": -2.2698609828948975, |
|
"logps/generated": -278.7743835449219, |
|
"logps/real": -171.54296875, |
|
"loss": 0.1414, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -14.173556327819824, |
|
"rewards/margins": 10.707517623901367, |
|
"rewards/real": -3.4660377502441406, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.388493859082094e-07, |
|
"logits/generated": -2.349855899810791, |
|
"logits/real": -2.2393364906311035, |
|
"logps/generated": -276.61981201171875, |
|
"logps/real": -174.84996032714844, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -15.051767349243164, |
|
"rewards/margins": 11.607970237731934, |
|
"rewards/real": -3.443795680999756, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3561732385261796e-07, |
|
"logits/generated": -2.334862232208252, |
|
"logits/real": -2.3226168155670166, |
|
"logps/generated": -275.31512451171875, |
|
"logps/real": -156.58956909179688, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -14.1647310256958, |
|
"rewards/margins": 12.026006698608398, |
|
"rewards/real": -2.1387248039245605, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.323852617970265e-07, |
|
"logits/generated": -2.498349189758301, |
|
"logits/real": -2.4359524250030518, |
|
"logps/generated": -256.3857727050781, |
|
"logps/real": -175.72055053710938, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -11.867788314819336, |
|
"rewards/margins": 8.631060600280762, |
|
"rewards/real": -3.2367255687713623, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/generated": -2.4644362926483154, |
|
"eval_logits/real": -2.478717565536499, |
|
"eval_logps/generated": -202.75526428222656, |
|
"eval_logps/real": -161.3638458251953, |
|
"eval_loss": 0.09024719893932343, |
|
"eval_rewards/accuracies": 0.9713375568389893, |
|
"eval_rewards/generated": -10.643847465515137, |
|
"eval_rewards/margins": 7.974020957946777, |
|
"eval_rewards/real": -2.669825553894043, |
|
"eval_runtime": 325.0517, |
|
"eval_samples_per_second": 15.382, |
|
"eval_steps_per_second": 0.483, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.29153199741435e-07, |
|
"logits/generated": -2.505728244781494, |
|
"logits/real": -2.505375385284424, |
|
"logps/generated": -230.0421600341797, |
|
"logps/real": -168.85704040527344, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.663579940795898, |
|
"rewards/margins": 7.189150333404541, |
|
"rewards/real": -3.474430799484253, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2592113768584355e-07, |
|
"logits/generated": -2.5055289268493652, |
|
"logits/real": -2.5188517570495605, |
|
"logps/generated": -253.8979034423828, |
|
"logps/real": -199.04542541503906, |
|
"loss": 0.1064, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -11.993169784545898, |
|
"rewards/margins": 7.974666595458984, |
|
"rewards/real": -4.018503665924072, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.226890756302521e-07, |
|
"logits/generated": -2.465848207473755, |
|
"logits/real": -2.450221061706543, |
|
"logps/generated": -267.3594665527344, |
|
"logps/real": -190.57058715820312, |
|
"loss": 0.0993, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -13.37690544128418, |
|
"rewards/margins": 9.04112720489502, |
|
"rewards/real": -4.33577823638916, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1945701357466063e-07, |
|
"logits/generated": -2.4809277057647705, |
|
"logits/real": -2.448288917541504, |
|
"logps/generated": -244.9989776611328, |
|
"logps/real": -176.53506469726562, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -12.209803581237793, |
|
"rewards/margins": 7.888075828552246, |
|
"rewards/real": -4.321727752685547, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1622495151906917e-07, |
|
"logits/generated": -2.5043458938598633, |
|
"logits/real": -2.4832687377929688, |
|
"logps/generated": -245.507568359375, |
|
"logps/real": -173.8964385986328, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.928030014038086, |
|
"rewards/margins": 8.440313339233398, |
|
"rewards/real": -3.4877171516418457, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.129928894634777e-07, |
|
"logits/generated": -2.5427870750427246, |
|
"logits/real": -2.499741315841675, |
|
"logps/generated": -237.24887084960938, |
|
"logps/real": -168.909423828125, |
|
"loss": 0.1476, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -10.177728652954102, |
|
"rewards/margins": 7.252813816070557, |
|
"rewards/real": -2.9249141216278076, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0976082740788623e-07, |
|
"logits/generated": -2.4873366355895996, |
|
"logits/real": -2.4385483264923096, |
|
"logps/generated": -206.4569549560547, |
|
"logps/real": -153.2084503173828, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -10.100156784057617, |
|
"rewards/margins": 7.314939975738525, |
|
"rewards/real": -2.785216808319092, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0652876535229474e-07, |
|
"logits/generated": -2.5049309730529785, |
|
"logits/real": -2.456437587738037, |
|
"logps/generated": -214.497314453125, |
|
"logps/real": -164.29551696777344, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -9.749935150146484, |
|
"rewards/margins": 6.255521774291992, |
|
"rewards/real": -3.4944145679473877, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0329670329670329e-07, |
|
"logits/generated": -2.5299432277679443, |
|
"logits/real": -2.425293445587158, |
|
"logps/generated": -239.8467254638672, |
|
"logps/real": -163.84437561035156, |
|
"loss": 0.0989, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.874212265014648, |
|
"rewards/margins": 8.04706859588623, |
|
"rewards/real": -2.8271448612213135, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0006464124111183e-07, |
|
"logits/generated": -2.4800021648406982, |
|
"logits/real": -2.451796293258667, |
|
"logps/generated": -237.79653930664062, |
|
"logps/real": -167.1535186767578, |
|
"loss": 0.1332, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -12.058743476867676, |
|
"rewards/margins": 8.454904556274414, |
|
"rewards/real": -3.6038384437561035, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/generated": -2.4417178630828857, |
|
"eval_logits/real": -2.462984800338745, |
|
"eval_logps/generated": -215.05523681640625, |
|
"eval_logps/real": -162.10159301757812, |
|
"eval_loss": 0.07708299905061722, |
|
"eval_rewards/accuracies": 0.9785031676292419, |
|
"eval_rewards/generated": -11.873842239379883, |
|
"eval_rewards/margins": 9.130241394042969, |
|
"eval_rewards/real": -2.743600368499756, |
|
"eval_runtime": 326.4428, |
|
"eval_samples_per_second": 15.317, |
|
"eval_steps_per_second": 0.481, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9683257918552034e-07, |
|
"logits/generated": -2.4482009410858154, |
|
"logits/real": -2.4847419261932373, |
|
"logps/generated": -222.0105438232422, |
|
"logps/real": -158.9528350830078, |
|
"loss": 0.1071, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -11.227673530578613, |
|
"rewards/margins": 8.365415573120117, |
|
"rewards/real": -2.862257480621338, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9360051712992888e-07, |
|
"logits/generated": -2.4634220600128174, |
|
"logits/real": -2.4727044105529785, |
|
"logps/generated": -239.70742797851562, |
|
"logps/real": -174.02890014648438, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -12.579872131347656, |
|
"rewards/margins": 8.563131332397461, |
|
"rewards/real": -4.016742706298828, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.903684550743374e-07, |
|
"logits/generated": -2.430037260055542, |
|
"logits/real": -2.390148639678955, |
|
"logps/generated": -231.12417602539062, |
|
"logps/real": -151.89051818847656, |
|
"loss": 0.1022, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -12.785438537597656, |
|
"rewards/margins": 9.053568840026855, |
|
"rewards/real": -3.731870174407959, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8713639301874596e-07, |
|
"logits/generated": -2.4577813148498535, |
|
"logits/real": -2.450479030609131, |
|
"logps/generated": -251.19302368164062, |
|
"logps/real": -171.656982421875, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -13.755993843078613, |
|
"rewards/margins": 9.537080764770508, |
|
"rewards/real": -4.2189130783081055, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.839043309631545e-07, |
|
"logits/generated": -2.449868679046631, |
|
"logits/real": -2.485016107559204, |
|
"logps/generated": -256.3826904296875, |
|
"logps/real": -172.46572875976562, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -13.964202880859375, |
|
"rewards/margins": 9.694369316101074, |
|
"rewards/real": -4.269833564758301, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8067226890756302e-07, |
|
"logits/generated": -2.500181198120117, |
|
"logits/real": -2.4853765964508057, |
|
"logps/generated": -255.6665802001953, |
|
"logps/real": -180.01492309570312, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.007906913757324, |
|
"rewards/margins": 9.470663070678711, |
|
"rewards/real": -3.537243604660034, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7744020685197156e-07, |
|
"logits/generated": -2.451597213745117, |
|
"logits/real": -2.4578232765197754, |
|
"logps/generated": -269.48602294921875, |
|
"logps/real": -180.25762939453125, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -14.529942512512207, |
|
"rewards/margins": 11.10853385925293, |
|
"rewards/real": -3.421407699584961, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7420814479638007e-07, |
|
"logits/generated": -2.4766621589660645, |
|
"logits/real": -2.428915023803711, |
|
"logps/generated": -268.610107421875, |
|
"logps/real": -183.23977661132812, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -13.613744735717773, |
|
"rewards/margins": 8.734598159790039, |
|
"rewards/real": -4.879148006439209, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7097608274078861e-07, |
|
"logits/generated": -2.4673948287963867, |
|
"logits/real": -2.426107883453369, |
|
"logps/generated": -268.60968017578125, |
|
"logps/real": -185.16867065429688, |
|
"loss": 0.1291, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -13.530069351196289, |
|
"rewards/margins": 9.933688163757324, |
|
"rewards/real": -3.5963797569274902, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6774402068519713e-07, |
|
"logits/generated": -2.4350733757019043, |
|
"logits/real": -2.3934569358825684, |
|
"logps/generated": -263.73193359375, |
|
"logps/real": -164.91358947753906, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -14.513885498046875, |
|
"rewards/margins": 10.701467514038086, |
|
"rewards/real": -3.8124184608459473, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/generated": -2.3948426246643066, |
|
"eval_logits/real": -2.425471067428589, |
|
"eval_logps/generated": -238.21556091308594, |
|
"eval_logps/real": -168.78074645996094, |
|
"eval_loss": 0.07581960409879684, |
|
"eval_rewards/accuracies": 0.9745222926139832, |
|
"eval_rewards/generated": -14.189876556396484, |
|
"eval_rewards/margins": 10.778358459472656, |
|
"eval_rewards/real": -3.4115185737609863, |
|
"eval_runtime": 324.5114, |
|
"eval_samples_per_second": 15.408, |
|
"eval_steps_per_second": 0.484, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6451195862960567e-07, |
|
"logits/generated": -2.4927570819854736, |
|
"logits/real": -2.4464633464813232, |
|
"logps/generated": -231.09664916992188, |
|
"logps/real": -159.4072265625, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -11.880216598510742, |
|
"rewards/margins": 8.993806838989258, |
|
"rewards/real": -2.886411190032959, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6127989657401424e-07, |
|
"logits/generated": -2.4784765243530273, |
|
"logits/real": -2.47477650642395, |
|
"logps/generated": -233.77877807617188, |
|
"logps/real": -159.3743438720703, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -11.6648588180542, |
|
"rewards/margins": 8.083456039428711, |
|
"rewards/real": -3.5814037322998047, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5804783451842275e-07, |
|
"logits/generated": -2.490647077560425, |
|
"logits/real": -2.463700532913208, |
|
"logps/generated": -277.70172119140625, |
|
"logps/real": -174.46707153320312, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -15.150970458984375, |
|
"rewards/margins": 11.056761741638184, |
|
"rewards/real": -4.094208717346191, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.548157724628313e-07, |
|
"logits/generated": -2.551166534423828, |
|
"logits/real": -2.5086779594421387, |
|
"logps/generated": -257.8534240722656, |
|
"logps/real": -183.65255737304688, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -13.366009712219238, |
|
"rewards/margins": 9.787813186645508, |
|
"rewards/real": -3.578195095062256, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.515837104072398e-07, |
|
"logits/generated": -2.5603954792022705, |
|
"logits/real": -2.539309501647949, |
|
"logps/generated": -229.6404266357422, |
|
"logps/real": -172.22093200683594, |
|
"loss": 0.1479, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -11.011828422546387, |
|
"rewards/margins": 7.343997955322266, |
|
"rewards/real": -3.6678295135498047, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4835164835164835e-07, |
|
"logits/generated": -2.575138568878174, |
|
"logits/real": -2.495459794998169, |
|
"logps/generated": -220.6327362060547, |
|
"logps/real": -144.017333984375, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -10.787625312805176, |
|
"rewards/margins": 8.316181182861328, |
|
"rewards/real": -2.4714438915252686, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.451195862960569e-07, |
|
"logits/generated": -2.5613296031951904, |
|
"logits/real": -2.569815158843994, |
|
"logps/generated": -232.4954071044922, |
|
"logps/real": -175.23074340820312, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -11.065740585327148, |
|
"rewards/margins": 7.911036491394043, |
|
"rewards/real": -3.1547024250030518, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.418875242404654e-07, |
|
"logits/generated": -2.539595365524292, |
|
"logits/real": -2.5092930793762207, |
|
"logps/generated": -220.0555419921875, |
|
"logps/real": -165.4029083251953, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -10.106549263000488, |
|
"rewards/margins": 7.346714019775391, |
|
"rewards/real": -2.7598352432250977, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3865546218487394e-07, |
|
"logits/generated": -2.6019704341888428, |
|
"logits/real": -2.5571720600128174, |
|
"logps/generated": -237.4385223388672, |
|
"logps/real": -167.28253173828125, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -10.303973197937012, |
|
"rewards/margins": 8.126482963562012, |
|
"rewards/real": -2.1774911880493164, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3542340012928246e-07, |
|
"logits/generated": -2.5232183933258057, |
|
"logits/real": -2.523380756378174, |
|
"logps/generated": -240.03897094726562, |
|
"logps/real": -154.43798828125, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -12.480694770812988, |
|
"rewards/margins": 9.408671379089355, |
|
"rewards/real": -3.072023868560791, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/generated": -2.5270004272460938, |
|
"eval_logits/real": -2.537477493286133, |
|
"eval_logps/generated": -207.37860107421875, |
|
"eval_logps/real": -158.70806884765625, |
|
"eval_loss": 0.07650701701641083, |
|
"eval_rewards/accuracies": 0.9753184914588928, |
|
"eval_rewards/generated": -11.106179237365723, |
|
"eval_rewards/margins": 8.70193099975586, |
|
"eval_rewards/real": -2.404249906539917, |
|
"eval_runtime": 324.1, |
|
"eval_samples_per_second": 15.427, |
|
"eval_steps_per_second": 0.484, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3219133807369102e-07, |
|
"logits/generated": -2.5183393955230713, |
|
"logits/real": -2.483584403991699, |
|
"logps/generated": -237.892578125, |
|
"logps/real": -154.9105682373047, |
|
"loss": 0.1098, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -12.671589851379395, |
|
"rewards/margins": 10.110515594482422, |
|
"rewards/real": -2.561074733734131, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2895927601809956e-07, |
|
"logits/generated": -2.5193591117858887, |
|
"logits/real": -2.546970844268799, |
|
"logps/generated": -238.3896484375, |
|
"logps/real": -164.31768798828125, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.500856399536133, |
|
"rewards/margins": 9.552709579467773, |
|
"rewards/real": -2.9481449127197266, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2572721396250808e-07, |
|
"logits/generated": -2.5152533054351807, |
|
"logits/real": -2.501941442489624, |
|
"logps/generated": -221.7640380859375, |
|
"logps/real": -161.3094940185547, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -10.399030685424805, |
|
"rewards/margins": 7.858689785003662, |
|
"rewards/real": -2.540339946746826, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.224951519069166e-07, |
|
"logits/generated": -2.5427238941192627, |
|
"logits/real": -2.5108070373535156, |
|
"logps/generated": -258.5174560546875, |
|
"logps/real": -167.56686401367188, |
|
"loss": 0.1183, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -12.709803581237793, |
|
"rewards/margins": 10.17547607421875, |
|
"rewards/real": -2.5343270301818848, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1926308985132513e-07, |
|
"logits/generated": -2.528041124343872, |
|
"logits/real": -2.5164339542388916, |
|
"logps/generated": -224.2671356201172, |
|
"logps/real": -177.60060119628906, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -10.380260467529297, |
|
"rewards/margins": 7.477629661560059, |
|
"rewards/real": -2.9026293754577637, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1603102779573367e-07, |
|
"logits/generated": -2.518709421157837, |
|
"logits/real": -2.4333877563476562, |
|
"logps/generated": -222.98892211914062, |
|
"logps/real": -153.88491821289062, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.736051559448242, |
|
"rewards/margins": 7.6436614990234375, |
|
"rewards/real": -3.092390537261963, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.127989657401422e-07, |
|
"logits/generated": -2.5053441524505615, |
|
"logits/real": -2.4574134349823, |
|
"logps/generated": -243.2651824951172, |
|
"logps/real": -179.1326904296875, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -12.284380912780762, |
|
"rewards/margins": 8.803535461425781, |
|
"rewards/real": -3.4808456897735596, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0956690368455074e-07, |
|
"logits/generated": -2.4903757572174072, |
|
"logits/real": -2.5189337730407715, |
|
"logps/generated": -226.8483428955078, |
|
"logps/real": -171.20724487304688, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -11.636832237243652, |
|
"rewards/margins": 8.194231033325195, |
|
"rewards/real": -3.4426021575927734, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0633484162895927e-07, |
|
"logits/generated": -2.5010979175567627, |
|
"logits/real": -2.4416332244873047, |
|
"logps/generated": -225.2292938232422, |
|
"logps/real": -172.9666748046875, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -10.62346363067627, |
|
"rewards/margins": 6.646616458892822, |
|
"rewards/real": -3.976848602294922, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.031027795733678e-07, |
|
"logits/generated": -2.489281177520752, |
|
"logits/real": -2.480539560317993, |
|
"logps/generated": -241.0604248046875, |
|
"logps/real": -165.22604370117188, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -12.310083389282227, |
|
"rewards/margins": 9.358478546142578, |
|
"rewards/real": -2.951603651046753, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/generated": -2.476224422454834, |
|
"eval_logits/real": -2.484823226928711, |
|
"eval_logps/generated": -220.34217834472656, |
|
"eval_logps/real": -162.47093200683594, |
|
"eval_loss": 0.07595483213663101, |
|
"eval_rewards/accuracies": 0.9745222926139832, |
|
"eval_rewards/generated": -12.402539253234863, |
|
"eval_rewards/margins": 9.622005462646484, |
|
"eval_rewards/real": -2.780533790588379, |
|
"eval_runtime": 325.3302, |
|
"eval_samples_per_second": 15.369, |
|
"eval_steps_per_second": 0.483, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.987071751777634e-08, |
|
"logits/generated": -2.517509937286377, |
|
"logits/real": -2.5105528831481934, |
|
"logps/generated": -220.71542358398438, |
|
"logps/real": -163.03253173828125, |
|
"loss": 0.0891, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -10.003129959106445, |
|
"rewards/margins": 6.5026068687438965, |
|
"rewards/real": -3.500523328781128, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.663865546218488e-08, |
|
"logits/generated": -2.494823694229126, |
|
"logits/real": -2.4722931385040283, |
|
"logps/generated": -218.6509552001953, |
|
"logps/real": -150.20521545410156, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -11.17399787902832, |
|
"rewards/margins": 8.31296157836914, |
|
"rewards/real": -2.861036777496338, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.340659340659341e-08, |
|
"logits/generated": -2.4990344047546387, |
|
"logits/real": -2.4319558143615723, |
|
"logps/generated": -241.905517578125, |
|
"logps/real": -166.25537109375, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -12.346821784973145, |
|
"rewards/margins": 8.885249137878418, |
|
"rewards/real": -3.461573839187622, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.017453135100193e-08, |
|
"logits/generated": -2.4997031688690186, |
|
"logits/real": -2.510288715362549, |
|
"logps/generated": -258.8919677734375, |
|
"logps/real": -195.13092041015625, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -13.229423522949219, |
|
"rewards/margins": 9.894552230834961, |
|
"rewards/real": -3.3348708152770996, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.694246929541046e-08, |
|
"logits/generated": -2.4908547401428223, |
|
"logits/real": -2.469104051589966, |
|
"logps/generated": -258.60723876953125, |
|
"logps/real": -183.93992614746094, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -12.868890762329102, |
|
"rewards/margins": 8.966911315917969, |
|
"rewards/real": -3.9019787311553955, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.371040723981899e-08, |
|
"logits/generated": -2.4881350994110107, |
|
"logits/real": -2.4609992504119873, |
|
"logps/generated": -257.45709228515625, |
|
"logps/real": -172.5931396484375, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -13.51276969909668, |
|
"rewards/margins": 9.734931945800781, |
|
"rewards/real": -3.777839183807373, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.047834518422754e-08, |
|
"logits/generated": -2.5072503089904785, |
|
"logits/real": -2.4428117275238037, |
|
"logps/generated": -254.014892578125, |
|
"logps/real": -170.81788635253906, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -12.408576011657715, |
|
"rewards/margins": 8.652534484863281, |
|
"rewards/real": -3.756040573120117, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.724628312863607e-08, |
|
"logits/generated": -2.4876208305358887, |
|
"logits/real": -2.463144302368164, |
|
"logps/generated": -272.12139892578125, |
|
"logps/real": -181.99913024902344, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -14.419462203979492, |
|
"rewards/margins": 10.72993278503418, |
|
"rewards/real": -3.68953013420105, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.40142210730446e-08, |
|
"logits/generated": -2.4878337383270264, |
|
"logits/real": -2.502197504043579, |
|
"logps/generated": -257.6641845703125, |
|
"logps/real": -180.44590759277344, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -12.998161315917969, |
|
"rewards/margins": 10.025131225585938, |
|
"rewards/real": -2.9730300903320312, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.078215901745313e-08, |
|
"logits/generated": -2.4765686988830566, |
|
"logits/real": -2.459664821624756, |
|
"logps/generated": -239.1787109375, |
|
"logps/real": -167.0284423828125, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -11.897302627563477, |
|
"rewards/margins": 8.363332748413086, |
|
"rewards/real": -3.5339698791503906, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_logits/generated": -2.465639591217041, |
|
"eval_logits/real": -2.4750711917877197, |
|
"eval_logps/generated": -226.3308868408203, |
|
"eval_logps/real": -164.7202606201172, |
|
"eval_loss": 0.0739506259560585, |
|
"eval_rewards/accuracies": 0.9713375568389893, |
|
"eval_rewards/generated": -13.001410484313965, |
|
"eval_rewards/margins": 9.995938301086426, |
|
"eval_rewards/real": -3.005469799041748, |
|
"eval_runtime": 325.1178, |
|
"eval_samples_per_second": 15.379, |
|
"eval_steps_per_second": 0.483, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.755009696186167e-08, |
|
"logits/generated": -2.4574570655822754, |
|
"logits/real": -2.478517770767212, |
|
"logps/generated": -235.34963989257812, |
|
"logps/real": -165.80545043945312, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.185359001159668, |
|
"rewards/margins": 8.462077140808105, |
|
"rewards/real": -3.723281145095825, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.43180349062702e-08, |
|
"logits/generated": -2.467890501022339, |
|
"logits/real": -2.449784994125366, |
|
"logps/generated": -265.53546142578125, |
|
"logps/real": -190.40701293945312, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -14.360349655151367, |
|
"rewards/margins": 10.199980735778809, |
|
"rewards/real": -4.160367965698242, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.108597285067872e-08, |
|
"logits/generated": -2.4615697860717773, |
|
"logits/real": -2.3793063163757324, |
|
"logps/generated": -239.954833984375, |
|
"logps/real": -161.84376525878906, |
|
"loss": 0.0975, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -12.927061080932617, |
|
"rewards/margins": 8.876288414001465, |
|
"rewards/real": -4.050771236419678, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.785391079508726e-08, |
|
"logits/generated": -2.4796242713928223, |
|
"logits/real": -2.4357521533966064, |
|
"logps/generated": -227.2936248779297, |
|
"logps/real": -163.6289825439453, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -11.878579139709473, |
|
"rewards/margins": 8.282608985900879, |
|
"rewards/real": -3.595970630645752, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.46218487394958e-08, |
|
"logits/generated": -2.4726433753967285, |
|
"logits/real": -2.3735179901123047, |
|
"logps/generated": -239.4695587158203, |
|
"logps/real": -159.3350372314453, |
|
"loss": 0.0978, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.301145553588867, |
|
"rewards/margins": 8.831232070922852, |
|
"rewards/real": -3.4699130058288574, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.1389786683904325e-08, |
|
"logits/generated": -2.488933801651001, |
|
"logits/real": -2.4772191047668457, |
|
"logps/generated": -250.51992797851562, |
|
"logps/real": -159.865478515625, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -12.80781364440918, |
|
"rewards/margins": 9.52310848236084, |
|
"rewards/real": -3.2847042083740234, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.8157724628312865e-08, |
|
"logits/generated": -2.5149645805358887, |
|
"logits/real": -2.483194589614868, |
|
"logps/generated": -259.7389221191406, |
|
"logps/real": -188.4167022705078, |
|
"loss": 0.0978, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -12.594181060791016, |
|
"rewards/margins": 8.321154594421387, |
|
"rewards/real": -4.273025035858154, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.492566257272139e-08, |
|
"logits/generated": -2.498760461807251, |
|
"logits/real": -2.4244179725646973, |
|
"logps/generated": -255.7240753173828, |
|
"logps/real": -168.46018981933594, |
|
"loss": 0.1101, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -12.637785911560059, |
|
"rewards/margins": 8.729381561279297, |
|
"rewards/real": -3.9084041118621826, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.169360051712993e-08, |
|
"logits/generated": -2.4851748943328857, |
|
"logits/real": -2.429105043411255, |
|
"logps/generated": -242.70553588867188, |
|
"logps/real": -165.17166137695312, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -12.373160362243652, |
|
"rewards/margins": 8.976916313171387, |
|
"rewards/real": -3.3962435722351074, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/generated": -2.468567371368408, |
|
"logits/real": -2.375533103942871, |
|
"logps/generated": -254.6434326171875, |
|
"logps/real": -174.11781311035156, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -13.183265686035156, |
|
"rewards/margins": 8.735334396362305, |
|
"rewards/real": -4.447932243347168, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/generated": -2.425264358520508, |
|
"eval_logits/real": -2.4319591522216797, |
|
"eval_logps/generated": -233.0531768798828, |
|
"eval_logps/real": -169.63656616210938, |
|
"eval_loss": 0.07743819802999496, |
|
"eval_rewards/accuracies": 0.9729299545288086, |
|
"eval_rewards/generated": -13.673635482788086, |
|
"eval_rewards/margins": 10.176533699035645, |
|
"eval_rewards/real": -3.497100591659546, |
|
"eval_runtime": 325.5057, |
|
"eval_samples_per_second": 15.361, |
|
"eval_steps_per_second": 0.482, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5229476405946995e-08, |
|
"logits/generated": -2.4387266635894775, |
|
"logits/real": -2.408844470977783, |
|
"logps/generated": -240.92184448242188, |
|
"logps/real": -162.61293029785156, |
|
"loss": 0.1445, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -13.79778003692627, |
|
"rewards/margins": 10.461742401123047, |
|
"rewards/real": -3.3360390663146973, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.199741435035552e-08, |
|
"logits/generated": -2.44557785987854, |
|
"logits/real": -2.4553208351135254, |
|
"logps/generated": -238.5950164794922, |
|
"logps/real": -176.37416076660156, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -12.455824851989746, |
|
"rewards/margins": 8.037620544433594, |
|
"rewards/real": -4.418205261230469, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.8765352294764057e-08, |
|
"logits/generated": -2.457484006881714, |
|
"logits/real": -2.424367904663086, |
|
"logps/generated": -241.5567626953125, |
|
"logps/real": -170.40658569335938, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -12.365758895874023, |
|
"rewards/margins": 8.254980087280273, |
|
"rewards/real": -4.110778331756592, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.553329023917259e-08, |
|
"logits/generated": -2.4767932891845703, |
|
"logits/real": -2.4457132816314697, |
|
"logps/generated": -255.2021026611328, |
|
"logps/real": -187.205078125, |
|
"loss": 0.0809, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -12.562596321105957, |
|
"rewards/margins": 8.74045467376709, |
|
"rewards/real": -3.82214093208313, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2301228183581126e-08, |
|
"logits/generated": -2.4593024253845215, |
|
"logits/real": -2.4330244064331055, |
|
"logps/generated": -261.88787841796875, |
|
"logps/real": -179.29541015625, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -13.55891227722168, |
|
"rewards/margins": 9.299590110778809, |
|
"rewards/real": -4.259322166442871, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.906916612798966e-08, |
|
"logits/generated": -2.467500925064087, |
|
"logits/real": -2.4541561603546143, |
|
"logps/generated": -245.1857452392578, |
|
"logps/real": -171.3612060546875, |
|
"loss": 0.1162, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -12.426248550415039, |
|
"rewards/margins": 8.478652000427246, |
|
"rewards/real": -3.9475975036621094, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5837104072398187e-08, |
|
"logits/generated": -2.459001064300537, |
|
"logits/real": -2.430206775665283, |
|
"logps/generated": -254.04745483398438, |
|
"logps/real": -169.44154357910156, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -13.104756355285645, |
|
"rewards/margins": 9.2105073928833, |
|
"rewards/real": -3.8942489624023438, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2605042016806723e-08, |
|
"logits/generated": -2.448197364807129, |
|
"logits/real": -2.455432891845703, |
|
"logps/generated": -263.8450622558594, |
|
"logps/real": -177.14439392089844, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -13.550898551940918, |
|
"rewards/margins": 9.719705581665039, |
|
"rewards/real": -3.831193208694458, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.372979961215254e-09, |
|
"logits/generated": -2.4577107429504395, |
|
"logits/real": -2.4188618659973145, |
|
"logps/generated": -236.26318359375, |
|
"logps/real": -172.19528198242188, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -11.246160507202148, |
|
"rewards/margins": 7.3128533363342285, |
|
"rewards/real": -3.93330717086792, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.140917905623787e-09, |
|
"logits/generated": -2.471553325653076, |
|
"logits/real": -2.3995721340179443, |
|
"logps/generated": -262.373291015625, |
|
"logps/real": -165.32789611816406, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -13.321420669555664, |
|
"rewards/margins": 10.259626388549805, |
|
"rewards/real": -3.0617949962615967, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/generated": -2.4198453426361084, |
|
"eval_logits/real": -2.423133373260498, |
|
"eval_logps/generated": -228.7878875732422, |
|
"eval_logps/real": -165.37667846679688, |
|
"eval_loss": 0.07379047572612762, |
|
"eval_rewards/accuracies": 0.9713375568389893, |
|
"eval_rewards/generated": -13.247109413146973, |
|
"eval_rewards/margins": 10.17599868774414, |
|
"eval_rewards/real": -3.0711097717285156, |
|
"eval_runtime": 325.2984, |
|
"eval_samples_per_second": 15.371, |
|
"eval_steps_per_second": 0.483, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9088558500323206e-09, |
|
"logits/generated": -2.4227101802825928, |
|
"logits/real": -2.464235305786133, |
|
"logps/generated": -243.1490020751953, |
|
"logps/real": -169.15988159179688, |
|
"loss": 0.0914, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -12.564062118530273, |
|
"rewards/margins": 8.887134552001953, |
|
"rewards/real": -3.676928758621216, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1719, |
|
"total_flos": 0.0, |
|
"train_loss": 0.16738235295130943, |
|
"train_runtime": 14752.9454, |
|
"train_samples_per_second": 3.728, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1719, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|