|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9983539094650205, |
|
"eval_steps": 25, |
|
"global_step": 1214, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.098360655737705e-09, |
|
"logits/generated": -2.6401095390319824, |
|
"logits/real": -2.652092456817627, |
|
"logps/generated": -522.5341796875, |
|
"logps/real": -420.15106201171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0983606557377046e-08, |
|
"logits/generated": -2.7537119388580322, |
|
"logits/real": -2.749621868133545, |
|
"logps/generated": -494.7628479003906, |
|
"logps/real": -410.4559326171875, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/generated": -0.010888932272791862, |
|
"rewards/margins": -0.00011028432345483452, |
|
"rewards/real": -0.010999216698110104, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.196721311475409e-08, |
|
"logits/generated": -2.72717022895813, |
|
"logits/real": -2.736893892288208, |
|
"logps/generated": -487.6122131347656, |
|
"logps/real": -399.52362060546875, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/generated": -0.3106920123100281, |
|
"rewards/margins": 0.2161286622285843, |
|
"rewards/real": -0.09456336498260498, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_logits/generated": -2.698356866836548, |
|
"eval_logits/real": -2.709562301635742, |
|
"eval_logps/generated": -465.1226806640625, |
|
"eval_logps/real": -419.8426208496094, |
|
"eval_loss": 0.4210089147090912, |
|
"eval_rewards/accuracies": 0.8500000238418579, |
|
"eval_rewards/generated": -1.0787537097930908, |
|
"eval_rewards/margins": 0.8287025094032288, |
|
"eval_rewards/real": -0.25005120038986206, |
|
"eval_runtime": 549.2945, |
|
"eval_samples_per_second": 7.861, |
|
"eval_steps_per_second": 0.246, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2295081967213113e-07, |
|
"logits/generated": -2.7093679904937744, |
|
"logits/real": -2.718613624572754, |
|
"logps/generated": -482.2759704589844, |
|
"logps/real": -419.0752868652344, |
|
"loss": 0.4538, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.9867717623710632, |
|
"rewards/margins": 0.7356894016265869, |
|
"rewards/real": -0.2510823607444763, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.6393442622950818e-07, |
|
"logits/generated": -2.679270029067993, |
|
"logits/real": -2.691554307937622, |
|
"logps/generated": -500.12786865234375, |
|
"logps/real": -385.42303466796875, |
|
"loss": 0.3118, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -1.5584895610809326, |
|
"rewards/margins": 1.3963291645050049, |
|
"rewards/real": -0.16216044127941132, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0491803278688524e-07, |
|
"logits/generated": -2.6619114875793457, |
|
"logits/real": -2.6760687828063965, |
|
"logps/generated": -513.6600952148438, |
|
"logps/real": -445.169677734375, |
|
"loss": 0.2223, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -2.419264316558838, |
|
"rewards/margins": 2.2213003635406494, |
|
"rewards/real": -0.19796383380889893, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/generated": -2.630603551864624, |
|
"eval_logits/real": -2.6445798873901367, |
|
"eval_logps/generated": -485.2113342285156, |
|
"eval_logps/real": -423.0011291503906, |
|
"eval_loss": 0.2172713428735733, |
|
"eval_rewards/accuracies": 0.9175925850868225, |
|
"eval_rewards/generated": -3.0876214504241943, |
|
"eval_rewards/margins": 2.5217204093933105, |
|
"eval_rewards/real": -0.5659011602401733, |
|
"eval_runtime": 547.8486, |
|
"eval_samples_per_second": 7.882, |
|
"eval_steps_per_second": 0.246, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.4590163934426226e-07, |
|
"logits/generated": -2.6456358432769775, |
|
"logits/real": -2.656313419342041, |
|
"logps/generated": -483.8861389160156, |
|
"logps/real": -392.04681396484375, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -3.4535624980926514, |
|
"rewards/margins": 2.996718168258667, |
|
"rewards/real": -0.45684438943862915, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.868852459016393e-07, |
|
"logits/generated": -2.593245029449463, |
|
"logits/real": -2.633634328842163, |
|
"logps/generated": -518.401611328125, |
|
"logps/real": -430.7620544433594, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -3.725245237350464, |
|
"rewards/margins": 3.4510974884033203, |
|
"rewards/real": -0.27414828538894653, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/generated": -2.583158254623413, |
|
"eval_logits/real": -2.600541353225708, |
|
"eval_logps/generated": -499.10601806640625, |
|
"eval_logps/real": -424.4022216796875, |
|
"eval_loss": 0.15319335460662842, |
|
"eval_rewards/accuracies": 0.9435185194015503, |
|
"eval_rewards/generated": -4.4770894050598145, |
|
"eval_rewards/margins": 3.7710745334625244, |
|
"eval_rewards/real": -0.7060146927833557, |
|
"eval_runtime": 550.1138, |
|
"eval_samples_per_second": 7.849, |
|
"eval_steps_per_second": 0.245, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.2786885245901637e-07, |
|
"logits/generated": -2.5965609550476074, |
|
"logits/real": -2.6199886798858643, |
|
"logps/generated": -503.3633728027344, |
|
"logps/real": -432.483642578125, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -4.042363166809082, |
|
"rewards/margins": 3.511378765106201, |
|
"rewards/real": -0.5309839248657227, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6885245901639347e-07, |
|
"logits/generated": -2.568969249725342, |
|
"logits/real": -2.5813772678375244, |
|
"logps/generated": -529.686767578125, |
|
"logps/real": -427.0670471191406, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -5.874642848968506, |
|
"rewards/margins": 5.00606632232666, |
|
"rewards/real": -0.8685771822929382, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.0983606557377047e-07, |
|
"logits/generated": -2.590125560760498, |
|
"logits/real": -2.594589948654175, |
|
"logps/generated": -532.7440185546875, |
|
"logps/real": -431.35028076171875, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/generated": -5.600076198577881, |
|
"rewards/margins": 4.819561958312988, |
|
"rewards/real": -0.7805139422416687, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/generated": -2.596092939376831, |
|
"eval_logits/real": -2.611849784851074, |
|
"eval_logps/generated": -517.4968872070312, |
|
"eval_logps/real": -430.088623046875, |
|
"eval_loss": 0.12175341695547104, |
|
"eval_rewards/accuracies": 0.9509259462356567, |
|
"eval_rewards/generated": -6.31616735458374, |
|
"eval_rewards/margins": 5.041518211364746, |
|
"eval_rewards/real": -1.2746495008468628, |
|
"eval_runtime": 548.1161, |
|
"eval_samples_per_second": 7.878, |
|
"eval_steps_per_second": 0.246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.508196721311475e-07, |
|
"logits/generated": -2.637228012084961, |
|
"logits/real": -2.649094343185425, |
|
"logps/generated": -518.5374145507812, |
|
"logps/real": -405.7285461425781, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -6.386349678039551, |
|
"rewards/margins": 5.354151725769043, |
|
"rewards/real": -1.0321977138519287, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.918032786885245e-07, |
|
"logits/generated": -2.613947868347168, |
|
"logits/real": -2.6607353687286377, |
|
"logps/generated": -521.7745361328125, |
|
"logps/real": -417.6166076660156, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -7.714804649353027, |
|
"rewards/margins": 6.425919532775879, |
|
"rewards/real": -1.2888853549957275, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/generated": -2.553359270095825, |
|
"eval_logits/real": -2.585937023162842, |
|
"eval_logps/generated": -544.7130126953125, |
|
"eval_logps/real": -435.28656005859375, |
|
"eval_loss": 0.09206286817789078, |
|
"eval_rewards/accuracies": 0.9611111283302307, |
|
"eval_rewards/generated": -9.037795066833496, |
|
"eval_rewards/margins": 7.243347644805908, |
|
"eval_rewards/real": -1.7944461107254028, |
|
"eval_runtime": 549.7281, |
|
"eval_samples_per_second": 7.855, |
|
"eval_steps_per_second": 0.246, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.963369963369964e-07, |
|
"logits/generated": -2.5732762813568115, |
|
"logits/real": -2.6250641345977783, |
|
"logps/generated": -594.252197265625, |
|
"logps/real": -480.178955078125, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -8.990375518798828, |
|
"rewards/margins": 7.42321252822876, |
|
"rewards/real": -1.5671632289886475, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.917582417582417e-07, |
|
"logits/generated": -2.588108777999878, |
|
"logits/real": -2.616245746612549, |
|
"logps/generated": -536.1387939453125, |
|
"logps/real": -419.4244079589844, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -8.070068359375, |
|
"rewards/margins": 6.980319976806641, |
|
"rewards/real": -1.089747667312622, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.871794871794871e-07, |
|
"logits/generated": -2.6047449111938477, |
|
"logits/real": -2.6315391063690186, |
|
"logps/generated": -543.27978515625, |
|
"logps/real": -417.0159606933594, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -8.380901336669922, |
|
"rewards/margins": 7.630522727966309, |
|
"rewards/real": -0.7503789067268372, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_logits/generated": -2.587456703186035, |
|
"eval_logits/real": -2.6238744258880615, |
|
"eval_logps/generated": -546.2610473632812, |
|
"eval_logps/real": -434.2024841308594, |
|
"eval_loss": 0.07384903728961945, |
|
"eval_rewards/accuracies": 0.9638888835906982, |
|
"eval_rewards/generated": -9.192586898803711, |
|
"eval_rewards/margins": 7.506547451019287, |
|
"eval_rewards/real": -1.686038851737976, |
|
"eval_runtime": 548.206, |
|
"eval_samples_per_second": 7.877, |
|
"eval_steps_per_second": 0.246, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.826007326007326e-07, |
|
"logits/generated": -2.591831684112549, |
|
"logits/real": -2.607217788696289, |
|
"logps/generated": -571.234619140625, |
|
"logps/real": -392.61004638671875, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -9.883105278015137, |
|
"rewards/margins": 7.8926191329956055, |
|
"rewards/real": -1.9904861450195312, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.78021978021978e-07, |
|
"logits/generated": -2.554612874984741, |
|
"logits/real": -2.5981972217559814, |
|
"logps/generated": -613.7628173828125, |
|
"logps/real": -422.0660095214844, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -11.727653503417969, |
|
"rewards/margins": 9.922538757324219, |
|
"rewards/real": -1.8051135540008545, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/generated": -2.5252153873443604, |
|
"eval_logits/real": -2.5698459148406982, |
|
"eval_logps/generated": -552.5237426757812, |
|
"eval_logps/real": -437.7025451660156, |
|
"eval_loss": 0.07325886934995651, |
|
"eval_rewards/accuracies": 0.9648148417472839, |
|
"eval_rewards/generated": -9.818856239318848, |
|
"eval_rewards/margins": 7.782812595367432, |
|
"eval_rewards/real": -2.036044120788574, |
|
"eval_runtime": 538.2331, |
|
"eval_samples_per_second": 8.023, |
|
"eval_steps_per_second": 0.251, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.734432234432234e-07, |
|
"logits/generated": -2.5366225242614746, |
|
"logits/real": -2.5802321434020996, |
|
"logps/generated": -562.9288940429688, |
|
"logps/real": -426.1321716308594, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -10.882379531860352, |
|
"rewards/margins": 8.783550262451172, |
|
"rewards/real": -2.098829984664917, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.6886446886446884e-07, |
|
"logits/generated": -2.464695930480957, |
|
"logits/real": -2.514756917953491, |
|
"logps/generated": -585.3269653320312, |
|
"logps/real": -439.30535888671875, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -13.198904037475586, |
|
"rewards/margins": 10.291043281555176, |
|
"rewards/real": -2.9078612327575684, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6428571428571427e-07, |
|
"logits/generated": -2.4625725746154785, |
|
"logits/real": -2.5219738483428955, |
|
"logps/generated": -573.2178955078125, |
|
"logps/real": -413.1720275878906, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -12.102069854736328, |
|
"rewards/margins": 9.350552558898926, |
|
"rewards/real": -2.751516342163086, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_logits/generated": -2.4633901119232178, |
|
"eval_logits/real": -2.5260355472564697, |
|
"eval_logps/generated": -556.628662109375, |
|
"eval_logps/real": -440.68316650390625, |
|
"eval_loss": 0.07137465476989746, |
|
"eval_rewards/accuracies": 0.9629629850387573, |
|
"eval_rewards/generated": -10.229352951049805, |
|
"eval_rewards/margins": 7.8952484130859375, |
|
"eval_rewards/real": -2.334104299545288, |
|
"eval_runtime": 541.6446, |
|
"eval_samples_per_second": 7.972, |
|
"eval_steps_per_second": 0.249, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.5970695970695965e-07, |
|
"logits/generated": -2.5092012882232666, |
|
"logits/real": -2.543381929397583, |
|
"logps/generated": -568.660888671875, |
|
"logps/real": -418.6875, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -10.36689281463623, |
|
"rewards/margins": 8.45336627960205, |
|
"rewards/real": -1.913527488708496, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.551282051282051e-07, |
|
"logits/generated": -2.365678310394287, |
|
"logits/real": -2.4535679817199707, |
|
"logps/generated": -604.965087890625, |
|
"logps/real": -432.04974365234375, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -14.320533752441406, |
|
"rewards/margins": 10.759347915649414, |
|
"rewards/real": -3.5611863136291504, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/generated": -2.4310951232910156, |
|
"eval_logits/real": -2.5141642093658447, |
|
"eval_logps/generated": -568.4989624023438, |
|
"eval_logps/real": -444.03936767578125, |
|
"eval_loss": 0.06977172195911407, |
|
"eval_rewards/accuracies": 0.9666666388511658, |
|
"eval_rewards/generated": -11.4163818359375, |
|
"eval_rewards/margins": 8.746658325195312, |
|
"eval_rewards/real": -2.6697258949279785, |
|
"eval_runtime": 540.5003, |
|
"eval_samples_per_second": 7.989, |
|
"eval_steps_per_second": 0.25, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.5054945054945056e-07, |
|
"logits/generated": -2.40238618850708, |
|
"logits/real": -2.490166425704956, |
|
"logps/generated": -583.9486083984375, |
|
"logps/real": -433.9161682128906, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.47558879852295, |
|
"rewards/margins": 8.76927661895752, |
|
"rewards/real": -2.7063136100769043, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.45970695970696e-07, |
|
"logits/generated": -2.3658084869384766, |
|
"logits/real": -2.4398703575134277, |
|
"logps/generated": -579.8946533203125, |
|
"logps/real": -403.6769104003906, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -12.590639114379883, |
|
"rewards/margins": 9.855241775512695, |
|
"rewards/real": -2.735395908355713, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.4139194139194137e-07, |
|
"logits/generated": -2.3249025344848633, |
|
"logits/real": -2.421391010284424, |
|
"logps/generated": -627.2833251953125, |
|
"logps/real": -436.53399658203125, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.549036979675293, |
|
"rewards/margins": 11.038546562194824, |
|
"rewards/real": -2.510490894317627, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/generated": -2.310610055923462, |
|
"eval_logits/real": -2.4202382564544678, |
|
"eval_logps/generated": -577.3876953125, |
|
"eval_logps/real": -441.26837158203125, |
|
"eval_loss": 0.0585518442094326, |
|
"eval_rewards/accuracies": 0.9694444537162781, |
|
"eval_rewards/generated": -12.305254936218262, |
|
"eval_rewards/margins": 9.912630081176758, |
|
"eval_rewards/real": -2.3926241397857666, |
|
"eval_runtime": 544.2791, |
|
"eval_samples_per_second": 7.933, |
|
"eval_steps_per_second": 0.248, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.368131868131868e-07, |
|
"logits/generated": -2.352004289627075, |
|
"logits/real": -2.446444034576416, |
|
"logps/generated": -596.7254638671875, |
|
"logps/real": -417.0332946777344, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -12.558148384094238, |
|
"rewards/margins": 10.505435943603516, |
|
"rewards/real": -2.0527119636535645, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.3223443223443223e-07, |
|
"logits/generated": -2.3381831645965576, |
|
"logits/real": -2.430725574493408, |
|
"logps/generated": -658.2124633789062, |
|
"logps/real": -449.29461669921875, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -17.05294418334961, |
|
"rewards/margins": 13.15046215057373, |
|
"rewards/real": -3.9024810791015625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/generated": -2.3879544734954834, |
|
"eval_logits/real": -2.4772706031799316, |
|
"eval_logps/generated": -573.7975463867188, |
|
"eval_logps/real": -442.5116882324219, |
|
"eval_loss": 0.06716620177030563, |
|
"eval_rewards/accuracies": 0.9675925970077515, |
|
"eval_rewards/generated": -11.946240425109863, |
|
"eval_rewards/margins": 9.429278373718262, |
|
"eval_rewards/real": -2.516960382461548, |
|
"eval_runtime": 543.8707, |
|
"eval_samples_per_second": 7.939, |
|
"eval_steps_per_second": 0.248, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.276556776556776e-07, |
|
"logits/generated": -2.2920713424682617, |
|
"logits/real": -2.4120707511901855, |
|
"logps/generated": -625.0443115234375, |
|
"logps/real": -443.11773681640625, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.292150497436523, |
|
"rewards/margins": 12.260358810424805, |
|
"rewards/real": -3.0317909717559814, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logits/generated": -2.2416317462921143, |
|
"logits/real": -2.3849129676818848, |
|
"logps/generated": -620.5736083984375, |
|
"logps/real": -438.10302734375, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -15.206059455871582, |
|
"rewards/margins": 11.891336441040039, |
|
"rewards/real": -3.314722776412964, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1849816849816847e-07, |
|
"logits/generated": -2.2657132148742676, |
|
"logits/real": -2.416287660598755, |
|
"logps/generated": -596.555908203125, |
|
"logps/real": -443.7845764160156, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -13.721325874328613, |
|
"rewards/margins": 11.418547630310059, |
|
"rewards/real": -2.30277681350708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/generated": -2.256381034851074, |
|
"eval_logits/real": -2.391341209411621, |
|
"eval_logps/generated": -605.804443359375, |
|
"eval_logps/real": -455.82989501953125, |
|
"eval_loss": 0.05395643413066864, |
|
"eval_rewards/accuracies": 0.9666666388511658, |
|
"eval_rewards/generated": -15.146928787231445, |
|
"eval_rewards/margins": 11.298150062561035, |
|
"eval_rewards/real": -3.8487789630889893, |
|
"eval_runtime": 543.6439, |
|
"eval_samples_per_second": 7.943, |
|
"eval_steps_per_second": 0.248, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1391941391941385e-07, |
|
"logits/generated": -2.2252917289733887, |
|
"logits/real": -2.3379065990448, |
|
"logps/generated": -627.6759033203125, |
|
"logps/real": -425.13201904296875, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -16.1324462890625, |
|
"rewards/margins": 12.730301856994629, |
|
"rewards/real": -3.40214467048645, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.0934065934065933e-07, |
|
"logits/generated": -2.301344633102417, |
|
"logits/real": -2.4362711906433105, |
|
"logps/generated": -602.3247680664062, |
|
"logps/real": -444.23974609375, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.596293449401855, |
|
"rewards/margins": 10.262487411499023, |
|
"rewards/real": -3.333805799484253, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/generated": -2.1402149200439453, |
|
"eval_logits/real": -2.322171926498413, |
|
"eval_logps/generated": -636.7122802734375, |
|
"eval_logps/real": -470.3190002441406, |
|
"eval_loss": 0.057357266545295715, |
|
"eval_rewards/accuracies": 0.9666666388511658, |
|
"eval_rewards/generated": -18.237712860107422, |
|
"eval_rewards/margins": 12.940022468566895, |
|
"eval_rewards/real": -5.297689437866211, |
|
"eval_runtime": 537.4705, |
|
"eval_samples_per_second": 8.034, |
|
"eval_steps_per_second": 0.251, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0476190476190476e-07, |
|
"logits/generated": -2.1620073318481445, |
|
"logits/real": -2.3063290119171143, |
|
"logps/generated": -652.4406127929688, |
|
"logps/real": -435.88494873046875, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.206768035888672, |
|
"rewards/margins": 13.691610336303711, |
|
"rewards/real": -4.51515531539917, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.001831501831502e-07, |
|
"logits/generated": -2.236128330230713, |
|
"logits/real": -2.3870835304260254, |
|
"logps/generated": -652.0904541015625, |
|
"logps/real": -450.24859619140625, |
|
"loss": 0.0553, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.906084060668945, |
|
"rewards/margins": 14.117843627929688, |
|
"rewards/real": -3.7882392406463623, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.9560439560439557e-07, |
|
"logits/generated": -2.176945209503174, |
|
"logits/real": -2.368508815765381, |
|
"logps/generated": -651.4620361328125, |
|
"logps/real": -432.74658203125, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.662532806396484, |
|
"rewards/margins": 14.847686767578125, |
|
"rewards/real": -3.8148434162139893, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/generated": -2.1701033115386963, |
|
"eval_logits/real": -2.3731424808502197, |
|
"eval_logps/generated": -627.2608032226562, |
|
"eval_logps/real": -454.8285827636719, |
|
"eval_loss": 0.04949037358164787, |
|
"eval_rewards/accuracies": 0.9731481671333313, |
|
"eval_rewards/generated": -17.292573928833008, |
|
"eval_rewards/margins": 13.543929100036621, |
|
"eval_rewards/real": -3.748645305633545, |
|
"eval_runtime": 537.9029, |
|
"eval_samples_per_second": 8.027, |
|
"eval_steps_per_second": 0.251, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.91025641025641e-07, |
|
"logits/generated": -2.2481982707977295, |
|
"logits/real": -2.3865692615509033, |
|
"logps/generated": -664.8062744140625, |
|
"logps/real": -430.34857177734375, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.22327423095703, |
|
"rewards/margins": 13.856378555297852, |
|
"rewards/real": -3.366894245147705, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.8644688644688643e-07, |
|
"logits/generated": -2.1518349647521973, |
|
"logits/real": -2.3465476036071777, |
|
"logps/generated": -613.2617797851562, |
|
"logps/real": -446.73834228515625, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -18.795753479003906, |
|
"rewards/margins": 14.179173469543457, |
|
"rewards/real": -4.616580009460449, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/generated": -2.316718339920044, |
|
"eval_logits/real": -2.478287935256958, |
|
"eval_logps/generated": -586.93896484375, |
|
"eval_logps/real": -441.6444091796875, |
|
"eval_loss": 0.053722720593214035, |
|
"eval_rewards/accuracies": 0.9722222089767456, |
|
"eval_rewards/generated": -13.260376930236816, |
|
"eval_rewards/margins": 10.830145835876465, |
|
"eval_rewards/real": -2.4302310943603516, |
|
"eval_runtime": 539.3726, |
|
"eval_samples_per_second": 8.006, |
|
"eval_steps_per_second": 0.25, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.818681318681318e-07, |
|
"logits/generated": -2.279561758041382, |
|
"logits/real": -2.478255033493042, |
|
"logps/generated": -601.7288208007812, |
|
"logps/real": -449.01702880859375, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.500157356262207, |
|
"rewards/margins": 11.545099258422852, |
|
"rewards/real": -2.9550578594207764, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.7728937728937724e-07, |
|
"logits/generated": -2.285148859024048, |
|
"logits/real": -2.452268123626709, |
|
"logps/generated": -673.3703002929688, |
|
"logps/real": -449.6754455566406, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -17.92129898071289, |
|
"rewards/margins": 14.363734245300293, |
|
"rewards/real": -3.5575671195983887, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.727106227106227e-07, |
|
"logits/generated": -2.158463954925537, |
|
"logits/real": -2.3927464485168457, |
|
"logps/generated": -665.3966064453125, |
|
"logps/real": -455.4427185058594, |
|
"loss": 0.0358, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -20.033100128173828, |
|
"rewards/margins": 16.113994598388672, |
|
"rewards/real": -3.9191043376922607, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_logits/generated": -2.1734633445739746, |
|
"eval_logits/real": -2.3874428272247314, |
|
"eval_logps/generated": -627.7240600585938, |
|
"eval_logps/real": -455.8509216308594, |
|
"eval_loss": 0.04597338289022446, |
|
"eval_rewards/accuracies": 0.9740740656852722, |
|
"eval_rewards/generated": -17.338891983032227, |
|
"eval_rewards/margins": 13.488015174865723, |
|
"eval_rewards/real": -3.8508799076080322, |
|
"eval_runtime": 545.3223, |
|
"eval_samples_per_second": 7.918, |
|
"eval_steps_per_second": 0.248, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.6813186813186816e-07, |
|
"logits/generated": -2.1853232383728027, |
|
"logits/real": -2.3945891857147217, |
|
"logps/generated": -642.46044921875, |
|
"logps/real": -399.55694580078125, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -18.870121002197266, |
|
"rewards/margins": 16.129247665405273, |
|
"rewards/real": -2.7408719062805176, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.6355311355311353e-07, |
|
"logits/generated": -2.1754229068756104, |
|
"logits/real": -2.381279468536377, |
|
"logps/generated": -595.3236083984375, |
|
"logps/real": -405.1600341796875, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -14.932568550109863, |
|
"rewards/margins": 12.319165229797363, |
|
"rewards/real": -2.6134040355682373, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/generated": -2.1550426483154297, |
|
"eval_logits/real": -2.3750507831573486, |
|
"eval_logps/generated": -636.3655395507812, |
|
"eval_logps/real": -460.6029052734375, |
|
"eval_loss": 0.048347219824790955, |
|
"eval_rewards/accuracies": 0.9740740656852722, |
|
"eval_rewards/generated": -18.20302963256836, |
|
"eval_rewards/margins": 13.876949310302734, |
|
"eval_rewards/real": -4.32607889175415, |
|
"eval_runtime": 546.9938, |
|
"eval_samples_per_second": 7.894, |
|
"eval_steps_per_second": 0.247, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.5897435897435896e-07, |
|
"logits/generated": -2.183345317840576, |
|
"logits/real": -2.4247512817382812, |
|
"logps/generated": -591.2523193359375, |
|
"logps/real": -400.4705810546875, |
|
"loss": 0.043, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -16.82411766052246, |
|
"rewards/margins": 14.277923583984375, |
|
"rewards/real": -2.5461928844451904, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.543956043956044e-07, |
|
"logits/generated": -2.2292613983154297, |
|
"logits/real": -2.4110050201416016, |
|
"logps/generated": -626.2135620117188, |
|
"logps/real": -403.21368408203125, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.94637680053711, |
|
"rewards/margins": 15.042144775390625, |
|
"rewards/real": -2.9042327404022217, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.4981684981684977e-07, |
|
"logits/generated": -2.2957308292388916, |
|
"logits/real": -2.483898639678955, |
|
"logps/generated": -624.1409301757812, |
|
"logps/real": -416.9951171875, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -17.344409942626953, |
|
"rewards/margins": 14.197443962097168, |
|
"rewards/real": -3.1469688415527344, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/generated": -2.29823899269104, |
|
"eval_logits/real": -2.481050968170166, |
|
"eval_logps/generated": -651.6072998046875, |
|
"eval_logps/real": -466.22760009765625, |
|
"eval_loss": 0.05667389929294586, |
|
"eval_rewards/accuracies": 0.9740740656852722, |
|
"eval_rewards/generated": -19.727210998535156, |
|
"eval_rewards/margins": 14.838663101196289, |
|
"eval_rewards/real": -4.888548851013184, |
|
"eval_runtime": 546.6533, |
|
"eval_samples_per_second": 7.899, |
|
"eval_steps_per_second": 0.247, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.452380952380952e-07, |
|
"logits/generated": -2.3011131286621094, |
|
"logits/real": -2.4612629413604736, |
|
"logps/generated": -647.5547485351562, |
|
"logps/real": -426.67205810546875, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -17.85089111328125, |
|
"rewards/margins": 14.307988166809082, |
|
"rewards/real": -3.5429024696350098, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.4065934065934063e-07, |
|
"logits/generated": -2.1878461837768555, |
|
"logits/real": -2.4027976989746094, |
|
"logps/generated": -651.8814697265625, |
|
"logps/real": -432.30828857421875, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -18.791553497314453, |
|
"rewards/margins": 14.97038459777832, |
|
"rewards/real": -3.8211684226989746, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/generated": -2.1936686038970947, |
|
"eval_logits/real": -2.424220323562622, |
|
"eval_logps/generated": -615.4547729492188, |
|
"eval_logps/real": -446.01873779296875, |
|
"eval_loss": 0.04669804871082306, |
|
"eval_rewards/accuracies": 0.9731481671333313, |
|
"eval_rewards/generated": -16.111957550048828, |
|
"eval_rewards/margins": 13.244292259216309, |
|
"eval_rewards/real": -2.8676631450653076, |
|
"eval_runtime": 548.2901, |
|
"eval_samples_per_second": 7.875, |
|
"eval_steps_per_second": 0.246, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.360805860805861e-07, |
|
"logits/generated": -2.170032501220703, |
|
"logits/real": -2.3914527893066406, |
|
"logps/generated": -638.609619140625, |
|
"logps/real": -419.04473876953125, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.431259155273438, |
|
"rewards/margins": 14.863021850585938, |
|
"rewards/real": -2.568234920501709, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.315018315018315e-07, |
|
"logits/generated": -2.125486135482788, |
|
"logits/real": -2.35538911819458, |
|
"logps/generated": -608.1831665039062, |
|
"logps/real": -426.45733642578125, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -16.634971618652344, |
|
"rewards/margins": 13.581262588500977, |
|
"rewards/real": -3.053709030151367, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.269230769230769e-07, |
|
"logits/generated": -2.0204501152038574, |
|
"logits/real": -2.329942226409912, |
|
"logps/generated": -684.5260620117188, |
|
"logps/real": -425.2499084472656, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -20.701831817626953, |
|
"rewards/margins": 17.69593620300293, |
|
"rewards/real": -3.0058956146240234, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/generated": -2.0107169151306152, |
|
"eval_logits/real": -2.3290646076202393, |
|
"eval_logps/generated": -639.0421752929688, |
|
"eval_logps/real": -449.8150939941406, |
|
"eval_loss": 0.04547751694917679, |
|
"eval_rewards/accuracies": 0.9768518805503845, |
|
"eval_rewards/generated": -18.4707088470459, |
|
"eval_rewards/margins": 15.223410606384277, |
|
"eval_rewards/real": -3.2472991943359375, |
|
"eval_runtime": 547.436, |
|
"eval_samples_per_second": 7.888, |
|
"eval_steps_per_second": 0.247, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2234432234432236e-07, |
|
"logits/generated": -2.1318891048431396, |
|
"logits/real": -2.373379707336426, |
|
"logps/generated": -664.7022705078125, |
|
"logps/real": -426.9652404785156, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -20.184797286987305, |
|
"rewards/margins": 16.790945053100586, |
|
"rewards/real": -3.393852949142456, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1776556776556773e-07, |
|
"logits/generated": -2.1151123046875, |
|
"logits/real": -2.3613884449005127, |
|
"logps/generated": -723.1424560546875, |
|
"logps/real": -472.94879150390625, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.221763610839844, |
|
"rewards/margins": 17.593347549438477, |
|
"rewards/real": -3.6284186840057373, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/generated": -2.214646100997925, |
|
"eval_logits/real": -2.409980297088623, |
|
"eval_logps/generated": -655.4663696289062, |
|
"eval_logps/real": -463.1470947265625, |
|
"eval_loss": 0.05429470166563988, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -20.113121032714844, |
|
"eval_rewards/margins": 15.532620429992676, |
|
"eval_rewards/real": -4.580500602722168, |
|
"eval_runtime": 546.7755, |
|
"eval_samples_per_second": 7.897, |
|
"eval_steps_per_second": 0.247, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.1318681318681316e-07, |
|
"logits/generated": -2.1360554695129395, |
|
"logits/real": -2.357234001159668, |
|
"logps/generated": -686.1627197265625, |
|
"logps/real": -463.78594970703125, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.75899887084961, |
|
"rewards/margins": 17.28326416015625, |
|
"rewards/real": -4.475732803344727, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.086080586080586e-07, |
|
"logits/generated": -2.198076009750366, |
|
"logits/real": -2.3724374771118164, |
|
"logps/generated": -632.1812133789062, |
|
"logps/real": -408.17999267578125, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.84307098388672, |
|
"rewards/margins": 14.931114196777344, |
|
"rewards/real": -2.911957263946533, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.0402930402930397e-07, |
|
"logits/generated": -2.0505592823028564, |
|
"logits/real": -2.2820277214050293, |
|
"logps/generated": -675.8702392578125, |
|
"logps/real": -435.8621520996094, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -21.609846115112305, |
|
"rewards/margins": 17.412473678588867, |
|
"rewards/real": -4.197373390197754, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_logits/generated": -2.055241823196411, |
|
"eval_logits/real": -2.3300585746765137, |
|
"eval_logps/generated": -658.2036743164062, |
|
"eval_logps/real": -460.3627014160156, |
|
"eval_loss": 0.04806026816368103, |
|
"eval_rewards/accuracies": 0.9731481671333313, |
|
"eval_rewards/generated": -20.386852264404297, |
|
"eval_rewards/margins": 16.084793090820312, |
|
"eval_rewards/real": -4.302060604095459, |
|
"eval_runtime": 545.5178, |
|
"eval_samples_per_second": 7.915, |
|
"eval_steps_per_second": 0.247, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.994505494505494e-07, |
|
"logits/generated": -2.05330491065979, |
|
"logits/real": -2.319866418838501, |
|
"logps/generated": -671.1466674804688, |
|
"logps/real": -431.24298095703125, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.11385154724121, |
|
"rewards/margins": 16.92177963256836, |
|
"rewards/real": -4.1920695304870605, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.948717948717949e-07, |
|
"logits/generated": -1.8845161199569702, |
|
"logits/real": -2.2347493171691895, |
|
"logps/generated": -694.9982299804688, |
|
"logps/real": -455.690185546875, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -24.400421142578125, |
|
"rewards/margins": 19.314403533935547, |
|
"rewards/real": -5.086018085479736, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_logits/generated": -1.9225108623504639, |
|
"eval_logits/real": -2.2634739875793457, |
|
"eval_logps/generated": -657.9219970703125, |
|
"eval_logps/real": -461.96160888671875, |
|
"eval_loss": 0.04638593643903732, |
|
"eval_rewards/accuracies": 0.9712963104248047, |
|
"eval_rewards/generated": -20.358694076538086, |
|
"eval_rewards/margins": 15.896740913391113, |
|
"eval_rewards/real": -4.461949348449707, |
|
"eval_runtime": 549.2313, |
|
"eval_samples_per_second": 7.862, |
|
"eval_steps_per_second": 0.246, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.902930402930403e-07, |
|
"logits/generated": -1.9606196880340576, |
|
"logits/real": -2.286708354949951, |
|
"logps/generated": -673.0288696289062, |
|
"logps/real": -418.47332763671875, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.76241683959961, |
|
"rewards/margins": 18.28946304321289, |
|
"rewards/real": -3.472952365875244, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.857142857142857e-07, |
|
"logits/generated": -1.8293163776397705, |
|
"logits/real": -2.22106671333313, |
|
"logps/generated": -645.2640380859375, |
|
"logps/real": -431.3738708496094, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -22.095638275146484, |
|
"rewards/margins": 17.28582000732422, |
|
"rewards/real": -4.8098225593566895, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.811355311355311e-07, |
|
"logits/generated": -1.9109601974487305, |
|
"logits/real": -2.295834541320801, |
|
"logps/generated": -623.3287963867188, |
|
"logps/real": -419.62939453125, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/generated": -19.795055389404297, |
|
"rewards/margins": 15.8367280960083, |
|
"rewards/real": -3.9583258628845215, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/generated": -1.9517701864242554, |
|
"eval_logits/real": -2.2964491844177246, |
|
"eval_logps/generated": -664.1465454101562, |
|
"eval_logps/real": -470.55169677734375, |
|
"eval_loss": 0.04514331370592117, |
|
"eval_rewards/accuracies": 0.9722222089767456, |
|
"eval_rewards/generated": -20.981136322021484, |
|
"eval_rewards/margins": 15.660176277160645, |
|
"eval_rewards/real": -5.320960521697998, |
|
"eval_runtime": 544.3521, |
|
"eval_samples_per_second": 7.932, |
|
"eval_steps_per_second": 0.248, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7655677655677655e-07, |
|
"logits/generated": -1.8589156866073608, |
|
"logits/real": -2.271904468536377, |
|
"logps/generated": -683.95654296875, |
|
"logps/real": -455.6640625, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -22.11398696899414, |
|
"rewards/margins": 17.55306053161621, |
|
"rewards/real": -4.560924530029297, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.7197802197802193e-07, |
|
"logits/generated": -1.9177764654159546, |
|
"logits/real": -2.2215471267700195, |
|
"logps/generated": -666.0201416015625, |
|
"logps/real": -429.0519104003906, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -18.098499298095703, |
|
"rewards/margins": 15.7960205078125, |
|
"rewards/real": -2.302478790283203, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/generated": -1.7574790716171265, |
|
"eval_logits/real": -2.170833110809326, |
|
"eval_logps/generated": -647.0514526367188, |
|
"eval_logps/real": -460.7373962402344, |
|
"eval_loss": 0.042933978140354156, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -19.27163314819336, |
|
"eval_rewards/margins": 14.932106971740723, |
|
"eval_rewards/real": -4.3395256996154785, |
|
"eval_runtime": 542.4468, |
|
"eval_samples_per_second": 7.96, |
|
"eval_steps_per_second": 0.249, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.6739926739926736e-07, |
|
"logits/generated": -1.8621714115142822, |
|
"logits/real": -2.1959869861602783, |
|
"logps/generated": -680.0298461914062, |
|
"logps/real": -468.26580810546875, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.896984100341797, |
|
"rewards/margins": 16.92976951599121, |
|
"rewards/real": -2.9672141075134277, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.628205128205128e-07, |
|
"logits/generated": -2.157670736312866, |
|
"logits/real": -2.3523807525634766, |
|
"logps/generated": -636.9368286132812, |
|
"logps/real": -439.37841796875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.98005199432373, |
|
"rewards/margins": 13.84521198272705, |
|
"rewards/real": -2.1348421573638916, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.582417582417583e-07, |
|
"logits/generated": -1.9907798767089844, |
|
"logits/real": -2.246988534927368, |
|
"logps/generated": -686.9197387695312, |
|
"logps/real": -434.936767578125, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.37822914123535, |
|
"rewards/margins": 17.642370223999023, |
|
"rewards/real": -2.7358591556549072, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_logits/generated": -1.8154959678649902, |
|
"eval_logits/real": -2.1757171154022217, |
|
"eval_logps/generated": -645.0802001953125, |
|
"eval_logps/real": -458.6592712402344, |
|
"eval_loss": 0.049214281141757965, |
|
"eval_rewards/accuracies": 0.970370352268219, |
|
"eval_rewards/generated": -19.07451057434082, |
|
"eval_rewards/margins": 14.942792892456055, |
|
"eval_rewards/real": -4.131715297698975, |
|
"eval_runtime": 543.2173, |
|
"eval_samples_per_second": 7.949, |
|
"eval_steps_per_second": 0.249, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.5366300366300365e-07, |
|
"logits/generated": -1.7823787927627563, |
|
"logits/real": -2.136547565460205, |
|
"logps/generated": -721.7634887695312, |
|
"logps/real": -412.015625, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.206735610961914, |
|
"rewards/margins": 20.0247745513916, |
|
"rewards/real": -4.181961536407471, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.490842490842491e-07, |
|
"logits/generated": -1.7356059551239014, |
|
"logits/real": -2.120824098587036, |
|
"logps/generated": -722.6532592773438, |
|
"logps/real": -429.0586853027344, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.548004150390625, |
|
"rewards/margins": 20.72186851501465, |
|
"rewards/real": -4.826132774353027, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_logits/generated": -1.6843816041946411, |
|
"eval_logits/real": -2.1123154163360596, |
|
"eval_logps/generated": -685.91259765625, |
|
"eval_logps/real": -474.67840576171875, |
|
"eval_loss": 0.04492348060011864, |
|
"eval_rewards/accuracies": 0.9712963104248047, |
|
"eval_rewards/generated": -23.157739639282227, |
|
"eval_rewards/margins": 17.424118041992188, |
|
"eval_rewards/real": -5.733626365661621, |
|
"eval_runtime": 538.5314, |
|
"eval_samples_per_second": 8.018, |
|
"eval_steps_per_second": 0.251, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.4450549450549446e-07, |
|
"logits/generated": -1.6796834468841553, |
|
"logits/real": -2.1368143558502197, |
|
"logps/generated": -713.38916015625, |
|
"logps/real": -448.4339904785156, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -26.294750213623047, |
|
"rewards/margins": 21.05908966064453, |
|
"rewards/real": -5.235660552978516, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.3992673992673995e-07, |
|
"logits/generated": -1.6848552227020264, |
|
"logits/real": -2.062554121017456, |
|
"logps/generated": -727.1676025390625, |
|
"logps/real": -474.197998046875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.072521209716797, |
|
"rewards/margins": 20.835004806518555, |
|
"rewards/real": -5.23751974105835, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.3534798534798532e-07, |
|
"logits/generated": -1.83333420753479, |
|
"logits/real": -2.158902406692505, |
|
"logps/generated": -751.34423828125, |
|
"logps/real": -447.7481994628906, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -26.157222747802734, |
|
"rewards/margins": 21.213214874267578, |
|
"rewards/real": -4.944005966186523, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_logits/generated": -2.014181613922119, |
|
"eval_logits/real": -2.2748398780822754, |
|
"eval_logps/generated": -716.32373046875, |
|
"eval_logps/real": -488.8265686035156, |
|
"eval_loss": 0.060765769332647324, |
|
"eval_rewards/accuracies": 0.9712963104248047, |
|
"eval_rewards/generated": -26.19886016845703, |
|
"eval_rewards/margins": 19.0504150390625, |
|
"eval_rewards/real": -7.1484456062316895, |
|
"eval_runtime": 546.2873, |
|
"eval_samples_per_second": 7.904, |
|
"eval_steps_per_second": 0.247, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.3076923076923078e-07, |
|
"logits/generated": -2.009290933609009, |
|
"logits/real": -2.248413562774658, |
|
"logps/generated": -723.00830078125, |
|
"logps/real": -485.6675720214844, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.66790771484375, |
|
"rewards/margins": 20.04497718811035, |
|
"rewards/real": -6.622932434082031, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.2619047619047619e-07, |
|
"logits/generated": -2.0311272144317627, |
|
"logits/real": -2.2884433269500732, |
|
"logps/generated": -705.41259765625, |
|
"logps/real": -439.14654541015625, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.88492202758789, |
|
"rewards/margins": 21.01565170288086, |
|
"rewards/real": -4.869270324707031, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_logits/generated": -2.1674070358276367, |
|
"eval_logits/real": -2.3962087631225586, |
|
"eval_logps/generated": -656.84765625, |
|
"eval_logps/real": -462.290283203125, |
|
"eval_loss": 0.04788418486714363, |
|
"eval_rewards/accuracies": 0.9722222089767456, |
|
"eval_rewards/generated": -20.25126075744629, |
|
"eval_rewards/margins": 15.756444931030273, |
|
"eval_rewards/real": -4.494814872741699, |
|
"eval_runtime": 549.4973, |
|
"eval_samples_per_second": 7.858, |
|
"eval_steps_per_second": 0.246, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.216117216117216e-07, |
|
"logits/generated": -2.1520206928253174, |
|
"logits/real": -2.384286403656006, |
|
"logps/generated": -693.1768188476562, |
|
"logps/real": -427.69384765625, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -21.8217830657959, |
|
"rewards/margins": 18.449357986450195, |
|
"rewards/real": -3.3724265098571777, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1703296703296702e-07, |
|
"logits/generated": -2.164816379547119, |
|
"logits/real": -2.4095542430877686, |
|
"logps/generated": -731.2252807617188, |
|
"logps/real": -431.8036193847656, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.88985252380371, |
|
"rewards/margins": 19.35944938659668, |
|
"rewards/real": -3.5304055213928223, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.1245421245421245e-07, |
|
"logits/generated": -2.3042044639587402, |
|
"logits/real": -2.4798831939697266, |
|
"logps/generated": -710.3934326171875, |
|
"logps/real": -447.3128967285156, |
|
"loss": 0.032, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -22.082233428955078, |
|
"rewards/margins": 17.938434600830078, |
|
"rewards/real": -4.143797874450684, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_logits/generated": -2.2425506114959717, |
|
"eval_logits/real": -2.4413764476776123, |
|
"eval_logps/generated": -667.56494140625, |
|
"eval_logps/real": -468.2917175292969, |
|
"eval_loss": 0.05120517686009407, |
|
"eval_rewards/accuracies": 0.9685184955596924, |
|
"eval_rewards/generated": -21.322982788085938, |
|
"eval_rewards/margins": 16.22801971435547, |
|
"eval_rewards/real": -5.094962120056152, |
|
"eval_runtime": 548.9256, |
|
"eval_samples_per_second": 7.866, |
|
"eval_steps_per_second": 0.246, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0787545787545788e-07, |
|
"logits/generated": -2.3006973266601562, |
|
"logits/real": -2.4579455852508545, |
|
"logps/generated": -678.5315551757812, |
|
"logps/real": -444.89593505859375, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.485198974609375, |
|
"rewards/margins": 17.16245460510254, |
|
"rewards/real": -3.322744846343994, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0329670329670329e-07, |
|
"logits/generated": -2.276136875152588, |
|
"logits/real": -2.4627890586853027, |
|
"logps/generated": -626.4572143554688, |
|
"logps/real": -460.61090087890625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -18.86962890625, |
|
"rewards/margins": 15.991884231567383, |
|
"rewards/real": -2.8777451515197754, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_logits/generated": -2.215578317642212, |
|
"eval_logits/real": -2.4378671646118164, |
|
"eval_logps/generated": -646.9547729492188, |
|
"eval_logps/real": -457.63812255859375, |
|
"eval_loss": 0.04618338495492935, |
|
"eval_rewards/accuracies": 0.970370352268219, |
|
"eval_rewards/generated": -19.261966705322266, |
|
"eval_rewards/margins": 15.232365608215332, |
|
"eval_rewards/real": -4.029602527618408, |
|
"eval_runtime": 545.4173, |
|
"eval_samples_per_second": 7.917, |
|
"eval_steps_per_second": 0.248, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.987179487179487e-07, |
|
"logits/generated": -2.1666736602783203, |
|
"logits/real": -2.430644989013672, |
|
"logps/generated": -617.3143310546875, |
|
"logps/real": -455.05450439453125, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -19.36526870727539, |
|
"rewards/margins": 16.152751922607422, |
|
"rewards/real": -3.2125182151794434, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.9413919413919415e-07, |
|
"logits/generated": -2.1149790287017822, |
|
"logits/real": -2.3885388374328613, |
|
"logps/generated": -665.8757934570312, |
|
"logps/real": -415.7660217285156, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -21.40970230102539, |
|
"rewards/margins": 19.079601287841797, |
|
"rewards/real": -2.3301024436950684, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.8956043956043955e-07, |
|
"logits/generated": -2.172045946121216, |
|
"logits/real": -2.39658784866333, |
|
"logps/generated": -646.7432861328125, |
|
"logps/real": -423.2108459472656, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.290569305419922, |
|
"rewards/margins": 17.622339248657227, |
|
"rewards/real": -2.6682305335998535, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_logits/generated": -2.132990598678589, |
|
"eval_logits/real": -2.384284019470215, |
|
"eval_logps/generated": -652.7453002929688, |
|
"eval_logps/real": -457.6903381347656, |
|
"eval_loss": 0.04753004014492035, |
|
"eval_rewards/accuracies": 0.9731481671333313, |
|
"eval_rewards/generated": -19.841014862060547, |
|
"eval_rewards/margins": 15.806192398071289, |
|
"eval_rewards/real": -4.034823894500732, |
|
"eval_runtime": 542.7194, |
|
"eval_samples_per_second": 7.956, |
|
"eval_steps_per_second": 0.249, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.8498168498168498e-07, |
|
"logits/generated": -2.2161784172058105, |
|
"logits/real": -2.3784358501434326, |
|
"logps/generated": -677.4197998046875, |
|
"logps/real": -415.10162353515625, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.94455909729004, |
|
"rewards/margins": 18.23735809326172, |
|
"rewards/real": -2.707200527191162, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.8040293040293039e-07, |
|
"logits/generated": -2.162409782409668, |
|
"logits/real": -2.4266014099121094, |
|
"logps/generated": -634.3106689453125, |
|
"logps/real": -426.293212890625, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.84092140197754, |
|
"rewards/margins": 17.243751525878906, |
|
"rewards/real": -2.5971689224243164, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_logits/generated": -2.1121842861175537, |
|
"eval_logits/real": -2.371814012527466, |
|
"eval_logps/generated": -662.919189453125, |
|
"eval_logps/real": -462.037841796875, |
|
"eval_loss": 0.042767442762851715, |
|
"eval_rewards/accuracies": 0.9722222089767456, |
|
"eval_rewards/generated": -20.858409881591797, |
|
"eval_rewards/margins": 16.388830184936523, |
|
"eval_rewards/real": -4.469577789306641, |
|
"eval_runtime": 539.1341, |
|
"eval_samples_per_second": 8.009, |
|
"eval_steps_per_second": 0.25, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.7582417582417584e-07, |
|
"logits/generated": -2.063286304473877, |
|
"logits/real": -2.3742778301239014, |
|
"logps/generated": -688.5067138671875, |
|
"logps/real": -435.5904846191406, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.731130599975586, |
|
"rewards/margins": 19.344463348388672, |
|
"rewards/real": -3.3866665363311768, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7124542124542125e-07, |
|
"logits/generated": -1.8474409580230713, |
|
"logits/real": -2.223342180252075, |
|
"logps/generated": -727.342041015625, |
|
"logps/real": -428.4285583496094, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.19036293029785, |
|
"rewards/margins": 22.461837768554688, |
|
"rewards/real": -4.728522300720215, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -1.7657016515731812, |
|
"logits/real": -2.246410369873047, |
|
"logps/generated": -764.67236328125, |
|
"logps/real": -466.6670837402344, |
|
"loss": 0.004, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -28.994009017944336, |
|
"rewards/margins": 23.707733154296875, |
|
"rewards/real": -5.286276817321777, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_logits/generated": -1.7239781618118286, |
|
"eval_logits/real": -2.1708648204803467, |
|
"eval_logps/generated": -710.6077880859375, |
|
"eval_logps/real": -480.1641845703125, |
|
"eval_loss": 0.04679808393120766, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -25.62726593017578, |
|
"eval_rewards/margins": 19.34505844116211, |
|
"eval_rewards/real": -6.282209396362305, |
|
"eval_runtime": 539.6647, |
|
"eval_samples_per_second": 8.001, |
|
"eval_steps_per_second": 0.25, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.6208791208791208e-07, |
|
"logits/generated": -1.5984615087509155, |
|
"logits/real": -2.1419482231140137, |
|
"logps/generated": -743.21337890625, |
|
"logps/real": -483.67608642578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.930688858032227, |
|
"rewards/margins": 22.560997009277344, |
|
"rewards/real": -5.369691371917725, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5750915750915748e-07, |
|
"logits/generated": -1.7401374578475952, |
|
"logits/real": -2.2165982723236084, |
|
"logps/generated": -744.0372314453125, |
|
"logps/real": -467.1297912597656, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.236160278320312, |
|
"rewards/margins": 20.77927017211914, |
|
"rewards/real": -5.456892013549805, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_logits/generated": -1.6543631553649902, |
|
"eval_logits/real": -2.1242499351501465, |
|
"eval_logps/generated": -685.1132202148438, |
|
"eval_logps/real": -477.74078369140625, |
|
"eval_loss": 0.05835163965821266, |
|
"eval_rewards/accuracies": 0.9759259223937988, |
|
"eval_rewards/generated": -23.077802658081055, |
|
"eval_rewards/margins": 17.037935256958008, |
|
"eval_rewards/real": -6.039866924285889, |
|
"eval_runtime": 542.9056, |
|
"eval_samples_per_second": 7.954, |
|
"eval_steps_per_second": 0.249, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.5293040293040294e-07, |
|
"logits/generated": -1.6089227199554443, |
|
"logits/real": -2.1636829376220703, |
|
"logps/generated": -724.6553344726562, |
|
"logps/real": -462.1668395996094, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.3486385345459, |
|
"rewards/margins": 20.25618553161621, |
|
"rewards/real": -5.0924506187438965, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4835164835164835e-07, |
|
"logits/generated": -1.762058973312378, |
|
"logits/real": -2.180694818496704, |
|
"logps/generated": -734.1724853515625, |
|
"logps/real": -474.927978515625, |
|
"loss": 0.0228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.476972579956055, |
|
"rewards/margins": 20.52760124206543, |
|
"rewards/real": -3.949371814727783, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4377289377289375e-07, |
|
"logits/generated": -1.8223994970321655, |
|
"logits/real": -2.230008602142334, |
|
"logps/generated": -674.3191528320312, |
|
"logps/real": -437.16448974609375, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -22.358070373535156, |
|
"rewards/margins": 19.133329391479492, |
|
"rewards/real": -3.2247397899627686, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_logits/generated": -1.7696380615234375, |
|
"eval_logits/real": -2.202569007873535, |
|
"eval_logps/generated": -652.3550415039062, |
|
"eval_logps/real": -456.0634765625, |
|
"eval_loss": 0.04898802191019058, |
|
"eval_rewards/accuracies": 0.9722222089767456, |
|
"eval_rewards/generated": -19.801984786987305, |
|
"eval_rewards/margins": 15.92984676361084, |
|
"eval_rewards/real": -3.8721377849578857, |
|
"eval_runtime": 541.3411, |
|
"eval_samples_per_second": 7.976, |
|
"eval_steps_per_second": 0.249, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.3919413919413918e-07, |
|
"logits/generated": -1.7800897359848022, |
|
"logits/real": -2.215033531188965, |
|
"logps/generated": -680.4356689453125, |
|
"logps/real": -436.82257080078125, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.27292251586914, |
|
"rewards/margins": 17.82526969909668, |
|
"rewards/real": -3.4476523399353027, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.346153846153846e-07, |
|
"logits/generated": -1.6774091720581055, |
|
"logits/real": -2.172245740890503, |
|
"logps/generated": -713.2435302734375, |
|
"logps/real": -457.10125732421875, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.64259147644043, |
|
"rewards/margins": 19.829133987426758, |
|
"rewards/real": -3.813458204269409, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_logits/generated": -1.6461421251296997, |
|
"eval_logits/real": -2.1238648891448975, |
|
"eval_logps/generated": -691.8392333984375, |
|
"eval_logps/real": -470.1639404296875, |
|
"eval_loss": 0.04775296524167061, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -23.750408172607422, |
|
"eval_rewards/margins": 18.46822738647461, |
|
"eval_rewards/real": -5.282179355621338, |
|
"eval_runtime": 545.1425, |
|
"eval_samples_per_second": 7.921, |
|
"eval_steps_per_second": 0.248, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.3003663003663004e-07, |
|
"logits/generated": -1.691014051437378, |
|
"logits/real": -2.090836763381958, |
|
"logps/generated": -717.5584716796875, |
|
"logps/real": -426.21075439453125, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.57354736328125, |
|
"rewards/margins": 20.423908233642578, |
|
"rewards/real": -4.1496381759643555, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.2545787545787545e-07, |
|
"logits/generated": -1.7866837978363037, |
|
"logits/real": -2.1096789836883545, |
|
"logps/generated": -695.7778930664062, |
|
"logps/real": -430.5741271972656, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -23.648204803466797, |
|
"rewards/margins": 19.338354110717773, |
|
"rewards/real": -4.30985164642334, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.2087912087912088e-07, |
|
"logits/generated": -1.6880855560302734, |
|
"logits/real": -2.137526512145996, |
|
"logps/generated": -677.9566040039062, |
|
"logps/real": -424.09771728515625, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -23.116125106811523, |
|
"rewards/margins": 19.75508689880371, |
|
"rewards/real": -3.3610382080078125, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/generated": -1.6890491247177124, |
|
"eval_logits/real": -2.1447362899780273, |
|
"eval_logps/generated": -683.7665405273438, |
|
"eval_logps/real": -466.7169189453125, |
|
"eval_loss": 0.045488789677619934, |
|
"eval_rewards/accuracies": 0.9731481671333313, |
|
"eval_rewards/generated": -22.943147659301758, |
|
"eval_rewards/margins": 18.005666732788086, |
|
"eval_rewards/real": -4.937481880187988, |
|
"eval_runtime": 541.292, |
|
"eval_samples_per_second": 7.977, |
|
"eval_steps_per_second": 0.249, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.163003663003663e-07, |
|
"logits/generated": -1.7114064693450928, |
|
"logits/real": -2.16520094871521, |
|
"logps/generated": -719.1756591796875, |
|
"logps/real": -456.89697265625, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.02173614501953, |
|
"rewards/margins": 20.79627227783203, |
|
"rewards/real": -4.225462913513184, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.1172161172161172e-07, |
|
"logits/generated": -1.6347287893295288, |
|
"logits/real": -2.1204426288604736, |
|
"logps/generated": -683.4697875976562, |
|
"logps/real": -454.54461669921875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.318696975708008, |
|
"rewards/margins": 19.61166763305664, |
|
"rewards/real": -4.707026958465576, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/generated": -1.5889698266983032, |
|
"eval_logits/real": -2.077850818634033, |
|
"eval_logps/generated": -704.8994140625, |
|
"eval_logps/real": -477.1242370605469, |
|
"eval_loss": 0.04485413804650307, |
|
"eval_rewards/accuracies": 0.9740740656852722, |
|
"eval_rewards/generated": -25.056419372558594, |
|
"eval_rewards/margins": 19.078208923339844, |
|
"eval_rewards/real": -5.978213787078857, |
|
"eval_runtime": 545.2881, |
|
"eval_samples_per_second": 7.919, |
|
"eval_steps_per_second": 0.248, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.0714285714285713e-07, |
|
"logits/generated": -1.6600666046142578, |
|
"logits/real": -2.114758014678955, |
|
"logps/generated": -709.62890625, |
|
"logps/real": -427.54022216796875, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/generated": -24.828792572021484, |
|
"rewards/margins": 20.330127716064453, |
|
"rewards/real": -4.498665809631348, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0256410256410256e-07, |
|
"logits/generated": -1.5314631462097168, |
|
"logits/real": -2.0638275146484375, |
|
"logps/generated": -710.69482421875, |
|
"logps/real": -447.7142028808594, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.278757095336914, |
|
"rewards/margins": 20.97730255126953, |
|
"rewards/real": -4.301451683044434, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.798534798534798e-08, |
|
"logits/generated": -1.659684181213379, |
|
"logits/real": -2.0716404914855957, |
|
"logps/generated": -747.033203125, |
|
"logps/real": -457.4671936035156, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.58378028869629, |
|
"rewards/margins": 22.331199645996094, |
|
"rewards/real": -4.2525811195373535, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/generated": -1.626175880432129, |
|
"eval_logits/real": -2.085879325866699, |
|
"eval_logps/generated": -683.6390991210938, |
|
"eval_logps/real": -469.9638671875, |
|
"eval_loss": 0.04278276115655899, |
|
"eval_rewards/accuracies": 0.9731481671333313, |
|
"eval_rewards/generated": -22.93039894104004, |
|
"eval_rewards/margins": 17.668224334716797, |
|
"eval_rewards/real": -5.262173175811768, |
|
"eval_runtime": 544.8826, |
|
"eval_samples_per_second": 7.925, |
|
"eval_steps_per_second": 0.248, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.340659340659341e-08, |
|
"logits/generated": -1.5913441181182861, |
|
"logits/real": -2.1010706424713135, |
|
"logps/generated": -683.9640502929688, |
|
"logps/real": -461.71038818359375, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.161916732788086, |
|
"rewards/margins": 19.142833709716797, |
|
"rewards/real": -4.0190839767456055, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.882783882783882e-08, |
|
"logits/generated": -1.6814014911651611, |
|
"logits/real": -2.1148276329040527, |
|
"logps/generated": -701.2926025390625, |
|
"logps/real": -449.57421875, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.634958267211914, |
|
"rewards/margins": 20.201641082763672, |
|
"rewards/real": -4.433315753936768, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_logits/generated": -1.607029914855957, |
|
"eval_logits/real": -2.093400478363037, |
|
"eval_logps/generated": -695.1799926757812, |
|
"eval_logps/real": -472.4885559082031, |
|
"eval_loss": 0.04106166213750839, |
|
"eval_rewards/accuracies": 0.9759259223937988, |
|
"eval_rewards/generated": -24.0844783782959, |
|
"eval_rewards/margins": 18.56983757019043, |
|
"eval_rewards/real": -5.514641284942627, |
|
"eval_runtime": 541.5376, |
|
"eval_samples_per_second": 7.974, |
|
"eval_steps_per_second": 0.249, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.424908424908425e-08, |
|
"logits/generated": -1.6199986934661865, |
|
"logits/real": -2.0736172199249268, |
|
"logps/generated": -728.6807861328125, |
|
"logps/real": -431.51824951171875, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.239320755004883, |
|
"rewards/margins": 21.229106903076172, |
|
"rewards/real": -4.0102128982543945, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.967032967032966e-08, |
|
"logits/generated": -1.6336625814437866, |
|
"logits/real": -2.1006054878234863, |
|
"logps/generated": -734.5277709960938, |
|
"logps/real": -478.76446533203125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.6556339263916, |
|
"rewards/margins": 21.211820602416992, |
|
"rewards/real": -5.443814277648926, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.509157509157509e-08, |
|
"logits/generated": -1.649921178817749, |
|
"logits/real": -2.090855598449707, |
|
"logps/generated": -750.3670043945312, |
|
"logps/real": -452.450927734375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.29367446899414, |
|
"rewards/margins": 22.180587768554688, |
|
"rewards/real": -4.113083362579346, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_logits/generated": -1.6779299974441528, |
|
"eval_logits/real": -2.1277213096618652, |
|
"eval_logps/generated": -691.9456787109375, |
|
"eval_logps/real": -471.51629638671875, |
|
"eval_loss": 0.04077158868312836, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -23.761049270629883, |
|
"eval_rewards/margins": 18.3436336517334, |
|
"eval_rewards/real": -5.417417049407959, |
|
"eval_runtime": 539.3269, |
|
"eval_samples_per_second": 8.006, |
|
"eval_steps_per_second": 0.25, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.051282051282051e-08, |
|
"logits/generated": -1.722020149230957, |
|
"logits/real": -2.128837823867798, |
|
"logps/generated": -728.57861328125, |
|
"logps/real": -456.64312744140625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.133262634277344, |
|
"rewards/margins": 21.077022552490234, |
|
"rewards/real": -4.0562424659729, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.593406593406594e-08, |
|
"logits/generated": -1.723589539527893, |
|
"logits/real": -2.129660129547119, |
|
"logps/generated": -737.1346435546875, |
|
"logps/real": -446.6085510253906, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.5841064453125, |
|
"rewards/margins": 21.89805030822754, |
|
"rewards/real": -4.686056137084961, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_logits/generated": -1.704836130142212, |
|
"eval_logits/real": -2.1411664485931396, |
|
"eval_logps/generated": -699.8467407226562, |
|
"eval_logps/real": -474.1795654296875, |
|
"eval_loss": 0.04111822694540024, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -24.55116081237793, |
|
"eval_rewards/margins": 18.867414474487305, |
|
"eval_rewards/real": -5.683747291564941, |
|
"eval_runtime": 540.5677, |
|
"eval_samples_per_second": 7.988, |
|
"eval_steps_per_second": 0.25, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.135531135531135e-08, |
|
"logits/generated": -1.6880667209625244, |
|
"logits/real": -2.135887861251831, |
|
"logps/generated": -704.2137451171875, |
|
"logps/real": -462.7354431152344, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.7542724609375, |
|
"rewards/margins": 20.865833282470703, |
|
"rewards/real": -4.88844108581543, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.677655677655677e-08, |
|
"logits/generated": -1.630059003829956, |
|
"logits/real": -2.092768430709839, |
|
"logps/generated": -720.8707275390625, |
|
"logps/real": -457.188720703125, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.587078094482422, |
|
"rewards/margins": 21.475454330444336, |
|
"rewards/real": -5.1116228103637695, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.2197802197802196e-08, |
|
"logits/generated": -1.6180572509765625, |
|
"logits/real": -2.073620080947876, |
|
"logps/generated": -702.1611328125, |
|
"logps/real": -436.5894470214844, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.184375762939453, |
|
"rewards/margins": 21.657176971435547, |
|
"rewards/real": -4.52719783782959, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_logits/generated": -1.6256543397903442, |
|
"eval_logits/real": -2.091676950454712, |
|
"eval_logps/generated": -707.8128662109375, |
|
"eval_logps/real": -476.0543212890625, |
|
"eval_loss": 0.04039894789457321, |
|
"eval_rewards/accuracies": 0.9759259223937988, |
|
"eval_rewards/generated": -25.347776412963867, |
|
"eval_rewards/margins": 19.476551055908203, |
|
"eval_rewards/real": -5.87122106552124, |
|
"eval_runtime": 541.5935, |
|
"eval_samples_per_second": 7.973, |
|
"eval_steps_per_second": 0.249, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.7619047619047613e-08, |
|
"logits/generated": -1.6621770858764648, |
|
"logits/real": -2.0922763347625732, |
|
"logps/generated": -746.7325439453125, |
|
"logps/real": -442.34857177734375, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -28.543167114257812, |
|
"rewards/margins": 23.869972229003906, |
|
"rewards/real": -4.6731953620910645, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.304029304029304e-08, |
|
"logits/generated": -1.6281875371932983, |
|
"logits/real": -2.095040798187256, |
|
"logps/generated": -751.2948608398438, |
|
"logps/real": -464.0716857910156, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.601943969726562, |
|
"rewards/margins": 21.863361358642578, |
|
"rewards/real": -3.738584041595459, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_logits/generated": -1.6509149074554443, |
|
"eval_logits/real": -2.1029398441314697, |
|
"eval_logps/generated": -686.7852783203125, |
|
"eval_logps/real": -468.099853515625, |
|
"eval_loss": 0.038483668118715286, |
|
"eval_rewards/accuracies": 0.9740740656852722, |
|
"eval_rewards/generated": -23.245014190673828, |
|
"eval_rewards/margins": 18.169240951538086, |
|
"eval_rewards/real": -5.075774192810059, |
|
"eval_runtime": 542.4541, |
|
"eval_samples_per_second": 7.96, |
|
"eval_steps_per_second": 0.249, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/generated": -1.7731685638427734, |
|
"logits/real": -2.1262717247009277, |
|
"logps/generated": -718.3228149414062, |
|
"logps/real": -450.9098205566406, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -23.984769821166992, |
|
"rewards/margins": 19.602436065673828, |
|
"rewards/real": -4.382335662841797, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.388278388278388e-08, |
|
"logits/generated": -1.7456114292144775, |
|
"logits/real": -2.1026391983032227, |
|
"logps/generated": -726.411376953125, |
|
"logps/real": -401.18524169921875, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.529359817504883, |
|
"rewards/margins": 21.635169982910156, |
|
"rewards/real": -3.8941879272460938, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.9304029304029303e-08, |
|
"logits/generated": -1.6194369792938232, |
|
"logits/real": -2.1418557167053223, |
|
"logps/generated": -741.9666748046875, |
|
"logps/real": -464.30682373046875, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -27.52840232849121, |
|
"rewards/margins": 22.9106388092041, |
|
"rewards/real": -4.617762565612793, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_logits/generated": -1.6736239194869995, |
|
"eval_logits/real": -2.1248714923858643, |
|
"eval_logps/generated": -689.571533203125, |
|
"eval_logps/real": -469.41937255859375, |
|
"eval_loss": 0.03756081312894821, |
|
"eval_rewards/accuracies": 0.9759259223937988, |
|
"eval_rewards/generated": -23.52364158630371, |
|
"eval_rewards/margins": 18.315916061401367, |
|
"eval_rewards/real": -5.20772647857666, |
|
"eval_runtime": 546.839, |
|
"eval_samples_per_second": 7.896, |
|
"eval_steps_per_second": 0.247, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.4725274725274723e-08, |
|
"logits/generated": -1.757741928100586, |
|
"logits/real": -2.1521947383880615, |
|
"logps/generated": -727.6856689453125, |
|
"logps/real": -433.1048889160156, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.25569725036621, |
|
"rewards/margins": 21.249250411987305, |
|
"rewards/real": -4.0064473152160645, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.0146520146520147e-08, |
|
"logits/generated": -1.6687755584716797, |
|
"logits/real": -2.1527109146118164, |
|
"logps/generated": -697.9734497070312, |
|
"logps/real": -447.568359375, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.455238342285156, |
|
"rewards/margins": 20.623992919921875, |
|
"rewards/real": -3.831244945526123, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_logits/generated": -1.6968693733215332, |
|
"eval_logits/real": -2.138258218765259, |
|
"eval_logps/generated": -687.8193359375, |
|
"eval_logps/real": -468.58880615234375, |
|
"eval_loss": 0.03790770843625069, |
|
"eval_rewards/accuracies": 0.9750000238418579, |
|
"eval_rewards/generated": -23.348421096801758, |
|
"eval_rewards/margins": 18.223752975463867, |
|
"eval_rewards/real": -5.124669075012207, |
|
"eval_runtime": 546.4967, |
|
"eval_samples_per_second": 7.901, |
|
"eval_steps_per_second": 0.247, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.5567765567765568e-08, |
|
"logits/generated": -1.6982448101043701, |
|
"logits/real": -2.162806987762451, |
|
"logps/generated": -685.9969482421875, |
|
"logps/real": -421.25787353515625, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -25.75900650024414, |
|
"rewards/margins": 22.124160766601562, |
|
"rewards/real": -3.63484525680542, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.098901098901099e-08, |
|
"logits/generated": -1.7311254739761353, |
|
"logits/real": -2.1759705543518066, |
|
"logps/generated": -742.8027954101562, |
|
"logps/real": -477.0604553222656, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.570262908935547, |
|
"rewards/margins": 21.3287410736084, |
|
"rewards/real": -4.2415266036987305, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.41025641025641e-09, |
|
"logits/generated": -1.7832437753677368, |
|
"logits/real": -2.145749568939209, |
|
"logps/generated": -705.14697265625, |
|
"logps/real": -451.7295837402344, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.158727645874023, |
|
"rewards/margins": 20.19052505493164, |
|
"rewards/real": -3.968203067779541, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_logits/generated": -1.6814604997634888, |
|
"eval_logits/real": -2.1279709339141846, |
|
"eval_logps/generated": -690.4514770507812, |
|
"eval_logps/real": -469.20892333984375, |
|
"eval_loss": 0.03795896843075752, |
|
"eval_rewards/accuracies": 0.9777777791023254, |
|
"eval_rewards/generated": -23.611637115478516, |
|
"eval_rewards/margins": 18.424955368041992, |
|
"eval_rewards/real": -5.186681747436523, |
|
"eval_runtime": 540.8892, |
|
"eval_samples_per_second": 7.983, |
|
"eval_steps_per_second": 0.25, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8315018315018314e-09, |
|
"logits/generated": -1.7156912088394165, |
|
"logits/real": -2.155310869216919, |
|
"logps/generated": -734.7672119140625, |
|
"logps/real": -468.51580810546875, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/generated": -27.58634376525879, |
|
"rewards/margins": 23.325031280517578, |
|
"rewards/real": -4.261313438415527, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1214, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05120065249445122, |
|
"train_runtime": 45973.3661, |
|
"train_samples_per_second": 1.69, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1214, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|