{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 5860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.1363636363636363e-08, "logits/chosen": -0.2343997359275818, "logits/rejected": -0.24077923595905304, "logps/chosen": -182.698486328125, "logps/rejected": -169.3557586669922, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.2727272727272725e-08, "logits/chosen": -0.28852421045303345, "logits/rejected": -0.30025073885917664, "logps/chosen": -138.6444091796875, "logps/rejected": -146.31468200683594, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 3.4090909090909086e-08, "logits/chosen": -0.29201382398605347, "logits/rejected": -0.29456159472465515, "logps/chosen": -207.91583251953125, "logps/rejected": -151.10482788085938, "loss": 0.6937, "rewards/accuracies": 0.5, "rewards/chosen": 0.004730558954179287, "rewards/margins": -0.0006856522522866726, "rewards/rejected": 0.005416211672127247, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.545454545454545e-08, "logits/chosen": -0.23803120851516724, "logits/rejected": -0.24260807037353516, "logps/chosen": -150.48223876953125, "logps/rejected": -155.54730224609375, "loss": 0.6923, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0032737725414335728, "rewards/margins": 0.0024062152951955795, "rewards/rejected": 0.0008675574790686369, "step": 4 }, { "epoch": 0.0, "learning_rate": 5.6818181818181815e-08, "logits/chosen": -0.3024767339229584, "logits/rejected": -0.3029945194721222, "logps/chosen": -122.08723449707031, "logps/rejected": -117.28968811035156, "loss": 0.6933, "rewards/accuracies": 0.5, "rewards/chosen": -0.008752036839723587, "rewards/margins": -0.01148636732250452, "rewards/rejected": 0.0027343304827809334, "step": 5 }, { "epoch": 0.0, "learning_rate": 6.818181818181817e-08, "logits/chosen": -0.30008330941200256, "logits/rejected": -0.29907041788101196, "logps/chosen": -166.4525146484375, "logps/rejected": -151.05093383789062, "loss": 0.6943, "rewards/accuracies": 0.5, "rewards/chosen": 0.01180595438927412, "rewards/margins": 0.014556355774402618, "rewards/rejected": -0.00275040278211236, "step": 6 }, { "epoch": 0.0, "learning_rate": 7.954545454545454e-08, "logits/chosen": -0.22585681080818176, "logits/rejected": -0.23094530403614044, "logps/chosen": -228.94667053222656, "logps/rejected": -194.66624450683594, "loss": 0.7006, "rewards/accuracies": 0.5, "rewards/chosen": -0.0013882624916732311, "rewards/margins": -0.007930541411042213, "rewards/rejected": 0.006542278453707695, "step": 7 }, { "epoch": 0.0, "learning_rate": 9.09090909090909e-08, "logits/chosen": -0.23698364198207855, "logits/rejected": -0.24435542523860931, "logps/chosen": -132.68223571777344, "logps/rejected": -164.8730010986328, "loss": 0.6932, "rewards/accuracies": 0.8125, "rewards/chosen": 0.012421870604157448, "rewards/margins": 0.02614724263548851, "rewards/rejected": -0.013725373893976212, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.0227272727272728e-07, "logits/chosen": -0.3020854592323303, "logits/rejected": -0.3010753095149994, "logps/chosen": -173.6214141845703, "logps/rejected": -157.474365234375, "loss": 0.6911, "rewards/accuracies": 0.5, "rewards/chosen": -0.007175935432314873, "rewards/margins": -0.009474508464336395, "rewards/rejected": 0.0022985723335295916, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.1363636363636363e-07, "logits/chosen": -0.30274057388305664, "logits/rejected": -0.31764745712280273, "logps/chosen": -174.84112548828125, "logps/rejected": -207.20021057128906, "loss": 0.6941, "rewards/accuracies": 0.5, "rewards/chosen": 0.014578714966773987, "rewards/margins": -0.0037426024209707975, "rewards/rejected": 0.018321316689252853, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.25e-07, "logits/chosen": -0.23441556096076965, "logits/rejected": -0.23721936345100403, "logps/chosen": -256.8955383300781, "logps/rejected": -183.5340576171875, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": 0.007555932272225618, "rewards/margins": 0.0127621591091156, "rewards/rejected": -0.0052062273025512695, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.3636363636363635e-07, "logits/chosen": -0.2632579505443573, "logits/rejected": -0.2740212380886078, "logps/chosen": -139.9535369873047, "logps/rejected": -158.4424591064453, "loss": 0.6912, "rewards/accuracies": 0.4375, "rewards/chosen": 0.006837489083409309, "rewards/margins": -0.0036489591002464294, "rewards/rejected": 0.010486448183655739, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.4772727272727272e-07, "logits/chosen": -0.2923070788383484, "logits/rejected": -0.29506534337997437, "logps/chosen": -170.7211151123047, "logps/rejected": -170.1407012939453, "loss": 0.6948, "rewards/accuracies": 0.4375, "rewards/chosen": 0.005404271651059389, "rewards/margins": 0.004588575568050146, "rewards/rejected": 0.0008156949770636857, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.5909090909090907e-07, "logits/chosen": -0.2513083219528198, "logits/rejected": -0.24310655891895294, "logps/chosen": -254.6034698486328, "logps/rejected": -192.11648559570312, "loss": 0.6868, "rewards/accuracies": 0.6875, "rewards/chosen": 0.016631020233035088, "rewards/margins": 0.013860584236681461, "rewards/rejected": 0.002770435530692339, "step": 14 }, { "epoch": 0.01, "learning_rate": 1.7045454545454543e-07, "logits/chosen": -0.29956790804862976, "logits/rejected": -0.30617430806159973, "logps/chosen": -202.54800415039062, "logps/rejected": -193.4821014404297, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": 0.002555498154833913, "rewards/margins": 0.005489732138812542, "rewards/rejected": -0.0029342356137931347, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.818181818181818e-07, "logits/chosen": -0.26666685938835144, "logits/rejected": -0.2638859748840332, "logps/chosen": -224.64999389648438, "logps/rejected": -189.45355224609375, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -0.0049195485189557076, "rewards/margins": -0.0020590913482010365, "rewards/rejected": -0.00286045647226274, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.9318181818181818e-07, "logits/chosen": -0.2248888462781906, "logits/rejected": -0.23183348774909973, "logps/chosen": -182.7044219970703, "logps/rejected": -139.090576171875, "loss": 0.692, "rewards/accuracies": 0.375, "rewards/chosen": -0.0020969328470528126, "rewards/margins": -0.005062336102128029, "rewards/rejected": 0.002965402789413929, "step": 17 }, { "epoch": 0.01, "learning_rate": 2.0454545454545456e-07, "logits/chosen": -0.26185184717178345, "logits/rejected": -0.2601316571235657, "logps/chosen": -185.71243286132812, "logps/rejected": -132.8929901123047, "loss": 0.6935, "rewards/accuracies": 0.625, "rewards/chosen": 0.01149480789899826, "rewards/margins": 0.010879718698561192, "rewards/rejected": 0.0006150883855298162, "step": 18 }, { "epoch": 0.01, "learning_rate": 2.159090909090909e-07, "logits/chosen": -0.29672956466674805, "logits/rejected": -0.30872124433517456, "logps/chosen": -225.4219970703125, "logps/rejected": -200.66668701171875, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": 0.01417109090834856, "rewards/margins": 0.0074797505512833595, "rewards/rejected": 0.006691341754049063, "step": 19 }, { "epoch": 0.01, "learning_rate": 2.2727272727272726e-07, "logits/chosen": -0.2807599604129791, "logits/rejected": -0.280718058347702, "logps/chosen": -197.77236938476562, "logps/rejected": -160.47621154785156, "loss": 0.6858, "rewards/accuracies": 0.625, "rewards/chosen": 0.0029895976185798645, "rewards/margins": 0.01468503475189209, "rewards/rejected": -0.011695438995957375, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.3863636363636364e-07, "logits/chosen": -0.3460007905960083, "logits/rejected": -0.34995123744010925, "logps/chosen": -196.3187713623047, "logps/rejected": -180.4865264892578, "loss": 0.6889, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0009605968371033669, "rewards/margins": 0.0028972653672099113, "rewards/rejected": -0.00385786360129714, "step": 21 }, { "epoch": 0.01, "learning_rate": 2.5e-07, "logits/chosen": -0.20942620933055878, "logits/rejected": -0.23549027740955353, "logps/chosen": -207.4935302734375, "logps/rejected": -213.09332275390625, "loss": 0.6913, "rewards/accuracies": 0.5, "rewards/chosen": 0.00033000181429088116, "rewards/margins": -0.002262298483401537, "rewards/rejected": 0.0025923000648617744, "step": 22 }, { "epoch": 0.01, "learning_rate": 2.6136363636363634e-07, "logits/chosen": -0.1959536224603653, "logits/rejected": -0.1860441416501999, "logps/chosen": -108.5407943725586, "logps/rejected": -90.0385513305664, "loss": 0.6841, "rewards/accuracies": 0.625, "rewards/chosen": 0.003614549059420824, "rewards/margins": 0.01736350543797016, "rewards/rejected": -0.01374895591288805, "step": 23 }, { "epoch": 0.01, "learning_rate": 2.727272727272727e-07, "logits/chosen": -0.28010228276252747, "logits/rejected": -0.2896505296230316, "logps/chosen": -133.8522491455078, "logps/rejected": -136.17059326171875, "loss": 0.6916, "rewards/accuracies": 0.5, "rewards/chosen": 0.006137552671134472, "rewards/margins": 0.01313910074532032, "rewards/rejected": -0.007001549005508423, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.840909090909091e-07, "logits/chosen": -0.2622797191143036, "logits/rejected": -0.27190378308296204, "logps/chosen": -205.2733917236328, "logps/rejected": -179.23460388183594, "loss": 0.6903, "rewards/accuracies": 0.5625, "rewards/chosen": -0.003634807886555791, "rewards/margins": -9.357708040624857e-05, "rewards/rejected": -0.003541230922564864, "step": 25 }, { "epoch": 0.01, "learning_rate": 2.9545454545454545e-07, "logits/chosen": -0.2434108555316925, "logits/rejected": -0.2525210380554199, "logps/chosen": -196.36239624023438, "logps/rejected": -187.47906494140625, "loss": 0.6874, "rewards/accuracies": 0.625, "rewards/chosen": -0.013590311631560326, "rewards/margins": 0.0018868573242798448, "rewards/rejected": -0.015477169305086136, "step": 26 }, { "epoch": 0.01, "learning_rate": 3.068181818181818e-07, "logits/chosen": -0.21510136127471924, "logits/rejected": -0.22136376798152924, "logps/chosen": -171.70278930664062, "logps/rejected": -152.1783905029297, "loss": 0.6886, "rewards/accuracies": 0.4375, "rewards/chosen": -0.014150906354188919, "rewards/margins": -0.013630504719913006, "rewards/rejected": -0.0005204020999372005, "step": 27 }, { "epoch": 0.01, "learning_rate": 3.1818181818181815e-07, "logits/chosen": -0.2792726755142212, "logits/rejected": -0.29076412320137024, "logps/chosen": -155.5727081298828, "logps/rejected": -150.3722686767578, "loss": 0.6857, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0044217826798558235, "rewards/margins": 0.014023029245436192, "rewards/rejected": -0.009601246565580368, "step": 28 }, { "epoch": 0.01, "learning_rate": 3.295454545454545e-07, "logits/chosen": -0.32396233081817627, "logits/rejected": -0.3238154649734497, "logps/chosen": -129.49224853515625, "logps/rejected": -109.01820373535156, "loss": 0.6846, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0008422324899584055, "rewards/margins": 8.309539407491684e-05, "rewards/rejected": -0.0009253278840333223, "step": 29 }, { "epoch": 0.01, "learning_rate": 3.4090909090909085e-07, "logits/chosen": -0.3346935212612152, "logits/rejected": -0.34624171257019043, "logps/chosen": -149.67381286621094, "logps/rejected": -180.94509887695312, "loss": 0.6895, "rewards/accuracies": 0.4375, "rewards/chosen": -0.010312279686331749, "rewards/margins": 0.006992962211370468, "rewards/rejected": -0.017305243760347366, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.5227272727272725e-07, "logits/chosen": -0.3297312557697296, "logits/rejected": -0.3398754894733429, "logps/chosen": -231.82850646972656, "logps/rejected": -216.95724487304688, "loss": 0.6836, "rewards/accuracies": 0.75, "rewards/chosen": 0.0022214404307305813, "rewards/margins": 0.023380177095532417, "rewards/rejected": -0.02115873619914055, "step": 31 }, { "epoch": 0.01, "learning_rate": 3.636363636363636e-07, "logits/chosen": -0.2028946727514267, "logits/rejected": -0.2021288424730301, "logps/chosen": -167.9632568359375, "logps/rejected": -125.26979064941406, "loss": 0.6852, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0033667704556137323, "rewards/margins": 0.001707445364445448, "rewards/rejected": -0.005074216052889824, "step": 32 }, { "epoch": 0.01, "learning_rate": 3.75e-07, "logits/chosen": -0.2517032027244568, "logits/rejected": -0.2777404189109802, "logps/chosen": -221.84396362304688, "logps/rejected": -242.36477661132812, "loss": 0.6789, "rewards/accuracies": 0.875, "rewards/chosen": 0.004632055759429932, "rewards/margins": 0.033185914158821106, "rewards/rejected": -0.028553854674100876, "step": 33 }, { "epoch": 0.01, "learning_rate": 3.8636363636363636e-07, "logits/chosen": -0.29831424355506897, "logits/rejected": -0.29975250363349915, "logps/chosen": -173.0123748779297, "logps/rejected": -167.66513061523438, "loss": 0.6808, "rewards/accuracies": 0.8125, "rewards/chosen": 0.01269460842013359, "rewards/margins": 0.03106355480849743, "rewards/rejected": -0.018368946388363838, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.977272727272727e-07, "logits/chosen": -0.2448384165763855, "logits/rejected": -0.26802122592926025, "logps/chosen": -167.3801727294922, "logps/rejected": -195.8806915283203, "loss": 0.6742, "rewards/accuracies": 0.75, "rewards/chosen": 0.013154092244803905, "rewards/margins": 0.041174013167619705, "rewards/rejected": -0.028019916266202927, "step": 35 }, { "epoch": 0.01, "learning_rate": 4.090909090909091e-07, "logits/chosen": -0.3090989887714386, "logits/rejected": -0.31425660848617554, "logps/chosen": -229.89382934570312, "logps/rejected": -193.78353881835938, "loss": 0.6748, "rewards/accuracies": 1.0, "rewards/chosen": 0.006313074380159378, "rewards/margins": 0.05698121339082718, "rewards/rejected": -0.0506681390106678, "step": 36 }, { "epoch": 0.01, "learning_rate": 4.2045454545454547e-07, "logits/chosen": -0.2655588984489441, "logits/rejected": -0.2689495384693146, "logps/chosen": -171.96102905273438, "logps/rejected": -153.8896942138672, "loss": 0.6756, "rewards/accuracies": 0.5625, "rewards/chosen": 0.013606562279164791, "rewards/margins": 0.03381725773215294, "rewards/rejected": -0.020210696384310722, "step": 37 }, { "epoch": 0.01, "learning_rate": 4.318181818181818e-07, "logits/chosen": -0.2794634699821472, "logits/rejected": -0.29232627153396606, "logps/chosen": -206.55679321289062, "logps/rejected": -209.3431396484375, "loss": 0.6775, "rewards/accuracies": 0.9375, "rewards/chosen": 0.018904510885477066, "rewards/margins": 0.07155171036720276, "rewards/rejected": -0.05264719948172569, "step": 38 }, { "epoch": 0.01, "learning_rate": 4.4318181818181817e-07, "logits/chosen": -0.31131479144096375, "logits/rejected": -0.3174147605895996, "logps/chosen": -227.93687438964844, "logps/rejected": -200.76556396484375, "loss": 0.6748, "rewards/accuracies": 0.8125, "rewards/chosen": 0.014896844513714314, "rewards/margins": 0.04546099901199341, "rewards/rejected": -0.03056415542960167, "step": 39 }, { "epoch": 0.01, "learning_rate": 4.545454545454545e-07, "logits/chosen": -0.31701505184173584, "logits/rejected": -0.31976842880249023, "logps/chosen": -282.0078430175781, "logps/rejected": -222.30572509765625, "loss": 0.6731, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01369166374206543, "rewards/margins": 0.05164778605103493, "rewards/rejected": -0.0379561185836792, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.6590909090909087e-07, "logits/chosen": -0.2500951588153839, "logits/rejected": -0.25820407271385193, "logps/chosen": -181.50071716308594, "logps/rejected": -199.80026245117188, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": 0.0007683795411139727, "rewards/margins": 0.03023991920053959, "rewards/rejected": -0.029471542686223984, "step": 41 }, { "epoch": 0.01, "learning_rate": 4.772727272727273e-07, "logits/chosen": -0.24639876186847687, "logits/rejected": -0.2537787854671478, "logps/chosen": -230.82440185546875, "logps/rejected": -190.56065368652344, "loss": 0.6736, "rewards/accuracies": 0.875, "rewards/chosen": 0.010890137404203415, "rewards/margins": 0.05246567353606224, "rewards/rejected": -0.041575539857149124, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.886363636363636e-07, "logits/chosen": -0.2642019987106323, "logits/rejected": -0.26276057958602905, "logps/chosen": -166.78939819335938, "logps/rejected": -145.7283935546875, "loss": 0.6697, "rewards/accuracies": 0.625, "rewards/chosen": -0.0029575787484645844, "rewards/margins": 0.01681508868932724, "rewards/rejected": -0.019772667437791824, "step": 43 }, { "epoch": 0.02, "learning_rate": 5e-07, "logits/chosen": -0.22562925517559052, "logits/rejected": -0.22602690756320953, "logps/chosen": -209.82810974121094, "logps/rejected": -186.64572143554688, "loss": 0.6664, "rewards/accuracies": 0.875, "rewards/chosen": -0.0020390397403389215, "rewards/margins": 0.05607151985168457, "rewards/rejected": -0.058110561221838, "step": 44 }, { "epoch": 0.02, "learning_rate": 5.113636363636363e-07, "logits/chosen": -0.2401418685913086, "logits/rejected": -0.24305514991283417, "logps/chosen": -173.5506591796875, "logps/rejected": -142.6803741455078, "loss": 0.6615, "rewards/accuracies": 0.9375, "rewards/chosen": 0.015486148186028004, "rewards/margins": 0.06680914759635925, "rewards/rejected": -0.051323000341653824, "step": 45 }, { "epoch": 0.02, "learning_rate": 5.227272727272727e-07, "logits/chosen": -0.3022044897079468, "logits/rejected": -0.3198543190956116, "logps/chosen": -183.73878479003906, "logps/rejected": -182.64312744140625, "loss": 0.666, "rewards/accuracies": 0.875, "rewards/chosen": -0.018290365114808083, "rewards/margins": 0.04416538029909134, "rewards/rejected": -0.06245574355125427, "step": 46 }, { "epoch": 0.02, "learning_rate": 5.34090909090909e-07, "logits/chosen": -0.24159403145313263, "logits/rejected": -0.23961226642131805, "logps/chosen": -167.75123596191406, "logps/rejected": -155.3538818359375, "loss": 0.6655, "rewards/accuracies": 0.9375, "rewards/chosen": 0.021827250719070435, "rewards/margins": 0.06491260975599289, "rewards/rejected": -0.043085359036922455, "step": 47 }, { "epoch": 0.02, "learning_rate": 5.454545454545454e-07, "logits/chosen": -0.29703110456466675, "logits/rejected": -0.30176591873168945, "logps/chosen": -170.77340698242188, "logps/rejected": -157.45681762695312, "loss": 0.6574, "rewards/accuracies": 0.75, "rewards/chosen": 0.018154719844460487, "rewards/margins": 0.06283828616142273, "rewards/rejected": -0.04468356817960739, "step": 48 }, { "epoch": 0.02, "learning_rate": 5.568181818181817e-07, "logits/chosen": -0.27908986806869507, "logits/rejected": -0.2893243134021759, "logps/chosen": -198.13739013671875, "logps/rejected": -234.70367431640625, "loss": 0.6575, "rewards/accuracies": 0.75, "rewards/chosen": 0.014338048174977303, "rewards/margins": 0.07027654349803925, "rewards/rejected": -0.055938493460416794, "step": 49 }, { "epoch": 0.02, "learning_rate": 5.681818181818182e-07, "logits/chosen": -0.19788961112499237, "logits/rejected": -0.20747853815555573, "logps/chosen": -159.4507293701172, "logps/rejected": -168.4470672607422, "loss": 0.6605, "rewards/accuracies": 0.875, "rewards/chosen": 0.01575118489563465, "rewards/margins": 0.06985358893871307, "rewards/rejected": -0.054102398455142975, "step": 50 }, { "epoch": 0.02, "learning_rate": 5.795454545454545e-07, "logits/chosen": -0.3402908742427826, "logits/rejected": -0.329073965549469, "logps/chosen": -226.89865112304688, "logps/rejected": -196.90249633789062, "loss": 0.6553, "rewards/accuracies": 0.9375, "rewards/chosen": 0.01842222549021244, "rewards/margins": 0.08155851066112518, "rewards/rejected": -0.06313628703355789, "step": 51 }, { "epoch": 0.02, "learning_rate": 5.909090909090909e-07, "logits/chosen": -0.27303409576416016, "logits/rejected": -0.27942875027656555, "logps/chosen": -214.09918212890625, "logps/rejected": -193.72808837890625, "loss": 0.6457, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0194240752607584, "rewards/margins": 0.11498739570379257, "rewards/rejected": -0.09556331485509872, "step": 52 }, { "epoch": 0.02, "learning_rate": 6.022727272727272e-07, "logits/chosen": -0.2995532751083374, "logits/rejected": -0.31458020210266113, "logps/chosen": -210.27957153320312, "logps/rejected": -216.9602813720703, "loss": 0.6614, "rewards/accuracies": 0.875, "rewards/chosen": 0.006506597623229027, "rewards/margins": 0.09189563244581223, "rewards/rejected": -0.08538904786109924, "step": 53 }, { "epoch": 0.02, "learning_rate": 6.136363636363636e-07, "logits/chosen": -0.32672762870788574, "logits/rejected": -0.3155325651168823, "logps/chosen": -196.874755859375, "logps/rejected": -143.55279541015625, "loss": 0.6561, "rewards/accuracies": 0.75, "rewards/chosen": 0.015613515861332417, "rewards/margins": 0.09669727087020874, "rewards/rejected": -0.0810837596654892, "step": 54 }, { "epoch": 0.02, "learning_rate": 6.249999999999999e-07, "logits/chosen": -0.3068453371524811, "logits/rejected": -0.31648698449134827, "logps/chosen": -132.4596710205078, "logps/rejected": -123.573486328125, "loss": 0.6597, "rewards/accuracies": 0.8125, "rewards/chosen": 0.0010456731542944908, "rewards/margins": 0.09725727885961533, "rewards/rejected": -0.09621160477399826, "step": 55 }, { "epoch": 0.02, "learning_rate": 6.363636363636363e-07, "logits/chosen": -0.31304705142974854, "logits/rejected": -0.3053418695926666, "logps/chosen": -204.09976196289062, "logps/rejected": -138.23899841308594, "loss": 0.6428, "rewards/accuracies": 1.0, "rewards/chosen": 0.01910821534693241, "rewards/margins": 0.08902747929096222, "rewards/rejected": -0.06991926580667496, "step": 56 }, { "epoch": 0.02, "learning_rate": 6.477272727272726e-07, "logits/chosen": -0.30527231097221375, "logits/rejected": -0.33461815118789673, "logps/chosen": -188.19590759277344, "logps/rejected": -222.41339111328125, "loss": 0.6319, "rewards/accuracies": 0.8125, "rewards/chosen": 0.0015424015000462532, "rewards/margins": 0.1188180148601532, "rewards/rejected": -0.11727561801671982, "step": 57 }, { "epoch": 0.02, "learning_rate": 6.59090909090909e-07, "logits/chosen": -0.3799721896648407, "logits/rejected": -0.3762143552303314, "logps/chosen": -220.26426696777344, "logps/rejected": -231.79885864257812, "loss": 0.6415, "rewards/accuracies": 0.8125, "rewards/chosen": 0.03202447667717934, "rewards/margins": 0.08796866238117218, "rewards/rejected": -0.055944185703992844, "step": 58 }, { "epoch": 0.02, "learning_rate": 6.704545454545453e-07, "logits/chosen": -0.2943131923675537, "logits/rejected": -0.2923906445503235, "logps/chosen": -145.5238494873047, "logps/rejected": -111.1203384399414, "loss": 0.6404, "rewards/accuracies": 0.875, "rewards/chosen": 0.01480076089501381, "rewards/margins": 0.06872867047786713, "rewards/rejected": -0.053927913308143616, "step": 59 }, { "epoch": 0.02, "learning_rate": 6.818181818181817e-07, "logits/chosen": -0.23360757529735565, "logits/rejected": -0.230882465839386, "logps/chosen": -172.37008666992188, "logps/rejected": -146.44906616210938, "loss": 0.6412, "rewards/accuracies": 0.875, "rewards/chosen": 0.02388569340109825, "rewards/margins": 0.08514552563428879, "rewards/rejected": -0.061259832233190536, "step": 60 }, { "epoch": 0.02, "learning_rate": 6.931818181818182e-07, "logits/chosen": -0.3111308813095093, "logits/rejected": -0.31337133049964905, "logps/chosen": -243.64974975585938, "logps/rejected": -196.9929962158203, "loss": 0.6174, "rewards/accuracies": 0.875, "rewards/chosen": 0.0566679984331131, "rewards/margins": 0.13957014679908752, "rewards/rejected": -0.08290214836597443, "step": 61 }, { "epoch": 0.02, "learning_rate": 7.045454545454545e-07, "logits/chosen": -0.2815157175064087, "logits/rejected": -0.2906446158885956, "logps/chosen": -222.7623748779297, "logps/rejected": -199.1682586669922, "loss": 0.6241, "rewards/accuracies": 0.9375, "rewards/chosen": 0.02818289026618004, "rewards/margins": 0.1475209891796112, "rewards/rejected": -0.11933809518814087, "step": 62 }, { "epoch": 0.02, "learning_rate": 7.159090909090909e-07, "logits/chosen": -0.3158195912837982, "logits/rejected": -0.3140709698200226, "logps/chosen": -264.5915832519531, "logps/rejected": -248.44268798828125, "loss": 0.6365, "rewards/accuracies": 0.9375, "rewards/chosen": 0.04043317213654518, "rewards/margins": 0.15893720090389252, "rewards/rejected": -0.11850403249263763, "step": 63 }, { "epoch": 0.02, "learning_rate": 7.272727272727272e-07, "logits/chosen": -0.2139422446489334, "logits/rejected": -0.2189634144306183, "logps/chosen": -211.82162475585938, "logps/rejected": -167.22552490234375, "loss": 0.6306, "rewards/accuracies": 0.9375, "rewards/chosen": 0.04416840150952339, "rewards/margins": 0.1527363359928131, "rewards/rejected": -0.10856793075799942, "step": 64 }, { "epoch": 0.02, "learning_rate": 7.386363636363636e-07, "logits/chosen": -0.23446856439113617, "logits/rejected": -0.2400975078344345, "logps/chosen": -216.3819122314453, "logps/rejected": -163.8125, "loss": 0.6259, "rewards/accuracies": 0.9375, "rewards/chosen": 0.030165541917085648, "rewards/margins": 0.1954352706670761, "rewards/rejected": -0.16526971757411957, "step": 65 }, { "epoch": 0.02, "learning_rate": 7.5e-07, "logits/chosen": -0.2053586095571518, "logits/rejected": -0.21432319283485413, "logps/chosen": -177.5720672607422, "logps/rejected": -150.52635192871094, "loss": 0.6312, "rewards/accuracies": 0.875, "rewards/chosen": 0.0359819196164608, "rewards/margins": 0.140329971909523, "rewards/rejected": -0.10434805601835251, "step": 66 }, { "epoch": 0.02, "learning_rate": 7.613636363636364e-07, "logits/chosen": -0.32677534222602844, "logits/rejected": -0.326854944229126, "logps/chosen": -172.74691772460938, "logps/rejected": -172.62301635742188, "loss": 0.6069, "rewards/accuracies": 0.9375, "rewards/chosen": 0.01757200062274933, "rewards/margins": 0.08874170482158661, "rewards/rejected": -0.07116970419883728, "step": 67 }, { "epoch": 0.02, "learning_rate": 7.727272727272727e-07, "logits/chosen": -0.2199333906173706, "logits/rejected": -0.22284862399101257, "logps/chosen": -170.17539978027344, "logps/rejected": -130.41236877441406, "loss": 0.6149, "rewards/accuracies": 0.875, "rewards/chosen": 0.021582338958978653, "rewards/margins": 0.11822602897882462, "rewards/rejected": -0.09664369374513626, "step": 68 }, { "epoch": 0.02, "learning_rate": 7.840909090909091e-07, "logits/chosen": -0.27920055389404297, "logits/rejected": -0.27513226866722107, "logps/chosen": -171.66378784179688, "logps/rejected": -177.5590362548828, "loss": 0.6146, "rewards/accuracies": 1.0, "rewards/chosen": 0.02089916542172432, "rewards/margins": 0.1842496693134308, "rewards/rejected": -0.16335052251815796, "step": 69 }, { "epoch": 0.02, "learning_rate": 7.954545454545454e-07, "logits/chosen": -0.310484915971756, "logits/rejected": -0.3289034962654114, "logps/chosen": -248.84005737304688, "logps/rejected": -160.34381103515625, "loss": 0.6114, "rewards/accuracies": 0.875, "rewards/chosen": 0.016305895522236824, "rewards/margins": 0.24285130202770233, "rewards/rejected": -0.22654540836811066, "step": 70 }, { "epoch": 0.02, "learning_rate": 8.068181818181818e-07, "logits/chosen": -0.24062317609786987, "logits/rejected": -0.2547876536846161, "logps/chosen": -190.35926818847656, "logps/rejected": -197.4306182861328, "loss": 0.571, "rewards/accuracies": 0.875, "rewards/chosen": 0.05205469951033592, "rewards/margins": 0.2569831609725952, "rewards/rejected": -0.20492848753929138, "step": 71 }, { "epoch": 0.02, "learning_rate": 8.181818181818182e-07, "logits/chosen": -0.2693640887737274, "logits/rejected": -0.2762475907802582, "logps/chosen": -208.79025268554688, "logps/rejected": -198.47604370117188, "loss": 0.5953, "rewards/accuracies": 1.0, "rewards/chosen": 0.04773230105638504, "rewards/margins": 0.29477986693382263, "rewards/rejected": -0.2470475435256958, "step": 72 }, { "epoch": 0.02, "learning_rate": 8.295454545454546e-07, "logits/chosen": -0.333812952041626, "logits/rejected": -0.3479444086551666, "logps/chosen": -205.3343048095703, "logps/rejected": -196.71554565429688, "loss": 0.5977, "rewards/accuracies": 0.8125, "rewards/chosen": 0.05839420109987259, "rewards/margins": 0.21544018387794495, "rewards/rejected": -0.15704597532749176, "step": 73 }, { "epoch": 0.03, "learning_rate": 8.409090909090909e-07, "logits/chosen": -0.3686254024505615, "logits/rejected": -0.37036076188087463, "logps/chosen": -116.63627624511719, "logps/rejected": -131.9492645263672, "loss": 0.6004, "rewards/accuracies": 0.9375, "rewards/chosen": 0.018282730132341385, "rewards/margins": 0.13822995126247406, "rewards/rejected": -0.11994721740484238, "step": 74 }, { "epoch": 0.03, "learning_rate": 8.522727272727273e-07, "logits/chosen": -0.304465651512146, "logits/rejected": -0.31471243500709534, "logps/chosen": -242.49000549316406, "logps/rejected": -222.29351806640625, "loss": 0.5891, "rewards/accuracies": 1.0, "rewards/chosen": 0.07918901741504669, "rewards/margins": 0.2643670439720154, "rewards/rejected": -0.1851780116558075, "step": 75 }, { "epoch": 0.03, "learning_rate": 8.636363636363636e-07, "logits/chosen": -0.36933425068855286, "logits/rejected": -0.36518651247024536, "logps/chosen": -168.39459228515625, "logps/rejected": -140.6094207763672, "loss": 0.6003, "rewards/accuracies": 1.0, "rewards/chosen": 0.028737641870975494, "rewards/margins": 0.18984174728393555, "rewards/rejected": -0.16110409796237946, "step": 76 }, { "epoch": 0.03, "learning_rate": 8.75e-07, "logits/chosen": -0.22869083285331726, "logits/rejected": -0.22699636220932007, "logps/chosen": -174.7917022705078, "logps/rejected": -143.05227661132812, "loss": 0.5903, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05000508576631546, "rewards/margins": 0.2358611524105072, "rewards/rejected": -0.18585607409477234, "step": 77 }, { "epoch": 0.03, "learning_rate": 8.863636363636363e-07, "logits/chosen": -0.3938540816307068, "logits/rejected": -0.40232500433921814, "logps/chosen": -139.92030334472656, "logps/rejected": -133.96234130859375, "loss": 0.5821, "rewards/accuracies": 1.0, "rewards/chosen": 0.0375688262283802, "rewards/margins": 0.1981363296508789, "rewards/rejected": -0.1605674922466278, "step": 78 }, { "epoch": 0.03, "learning_rate": 8.977272727272727e-07, "logits/chosen": -0.2505624294281006, "logits/rejected": -0.26981931924819946, "logps/chosen": -216.5614013671875, "logps/rejected": -246.10362243652344, "loss": 0.5874, "rewards/accuracies": 1.0, "rewards/chosen": 0.016081776469945908, "rewards/margins": 0.2081051766872406, "rewards/rejected": -0.1920233815908432, "step": 79 }, { "epoch": 0.03, "learning_rate": 9.09090909090909e-07, "logits/chosen": -0.306325227022171, "logits/rejected": -0.32030701637268066, "logps/chosen": -173.77772521972656, "logps/rejected": -155.49526977539062, "loss": 0.5844, "rewards/accuracies": 0.9375, "rewards/chosen": 0.027837982401251793, "rewards/margins": 0.28767284750938416, "rewards/rejected": -0.259834885597229, "step": 80 }, { "epoch": 0.03, "learning_rate": 9.204545454545454e-07, "logits/chosen": -0.3258974850177765, "logits/rejected": -0.33398178219795227, "logps/chosen": -142.85398864746094, "logps/rejected": -129.8448944091797, "loss": 0.5756, "rewards/accuracies": 1.0, "rewards/chosen": 0.03404791280627251, "rewards/margins": 0.20471598207950592, "rewards/rejected": -0.17066806554794312, "step": 81 }, { "epoch": 0.03, "learning_rate": 9.318181818181817e-07, "logits/chosen": -0.3090137243270874, "logits/rejected": -0.310472309589386, "logps/chosen": -221.82763671875, "logps/rejected": -199.4500732421875, "loss": 0.5597, "rewards/accuracies": 1.0, "rewards/chosen": 0.08749037981033325, "rewards/margins": 0.25874781608581543, "rewards/rejected": -0.17125742137432098, "step": 82 }, { "epoch": 0.03, "learning_rate": 9.431818181818182e-07, "logits/chosen": -0.309256911277771, "logits/rejected": -0.31177881360054016, "logps/chosen": -233.4256134033203, "logps/rejected": -224.6199951171875, "loss": 0.5527, "rewards/accuracies": 0.9375, "rewards/chosen": 0.06995458900928497, "rewards/margins": 0.42711231112480164, "rewards/rejected": -0.35715773701667786, "step": 83 }, { "epoch": 0.03, "learning_rate": 9.545454545454546e-07, "logits/chosen": -0.294746458530426, "logits/rejected": -0.3004245162010193, "logps/chosen": -260.4269714355469, "logps/rejected": -236.80093383789062, "loss": 0.5403, "rewards/accuracies": 1.0, "rewards/chosen": 0.12659308314323425, "rewards/margins": 0.345445454120636, "rewards/rejected": -0.21885238587856293, "step": 84 }, { "epoch": 0.03, "learning_rate": 9.65909090909091e-07, "logits/chosen": -0.2632029354572296, "logits/rejected": -0.2527550160884857, "logps/chosen": -208.35995483398438, "logps/rejected": -178.50851440429688, "loss": 0.5545, "rewards/accuracies": 1.0, "rewards/chosen": 0.06315146386623383, "rewards/margins": 0.4456632137298584, "rewards/rejected": -0.38251176476478577, "step": 85 }, { "epoch": 0.03, "learning_rate": 9.772727272727273e-07, "logits/chosen": -0.2838377356529236, "logits/rejected": -0.29327645897865295, "logps/chosen": -234.36648559570312, "logps/rejected": -154.83839416503906, "loss": 0.5595, "rewards/accuracies": 1.0, "rewards/chosen": 0.07653575390577316, "rewards/margins": 0.39571109414100647, "rewards/rejected": -0.3191753327846527, "step": 86 }, { "epoch": 0.03, "learning_rate": 9.886363636363636e-07, "logits/chosen": -0.3051324486732483, "logits/rejected": -0.3067479133605957, "logps/chosen": -201.1697540283203, "logps/rejected": -164.40013122558594, "loss": 0.5408, "rewards/accuracies": 1.0, "rewards/chosen": 0.0846482440829277, "rewards/margins": 0.3141711950302124, "rewards/rejected": -0.2295229583978653, "step": 87 }, { "epoch": 0.03, "learning_rate": 1e-06, "logits/chosen": -0.2840867340564728, "logits/rejected": -0.29730719327926636, "logps/chosen": -313.0265808105469, "logps/rejected": -256.49554443359375, "loss": 0.5386, "rewards/accuracies": 1.0, "rewards/chosen": 0.18609462678432465, "rewards/margins": 0.4921185076236725, "rewards/rejected": -0.30602386593818665, "step": 88 }, { "epoch": 0.03, "learning_rate": 1.0113636363636365e-06, "logits/chosen": -0.2443823218345642, "logits/rejected": -0.25303903222084045, "logps/chosen": -221.6973419189453, "logps/rejected": -201.59088134765625, "loss": 0.5281, "rewards/accuracies": 1.0, "rewards/chosen": 0.10603172332048416, "rewards/margins": 0.376494437456131, "rewards/rejected": -0.2704627215862274, "step": 89 }, { "epoch": 0.03, "learning_rate": 1.0227272727272727e-06, "logits/chosen": -0.39637792110443115, "logits/rejected": -0.4053821563720703, "logps/chosen": -276.1510925292969, "logps/rejected": -246.34449768066406, "loss": 0.5159, "rewards/accuracies": 1.0, "rewards/chosen": 0.185167133808136, "rewards/margins": 0.6777635216712952, "rewards/rejected": -0.49259647727012634, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.0340909090909092e-06, "logits/chosen": -0.29460346698760986, "logits/rejected": -0.30180084705352783, "logps/chosen": -194.49655151367188, "logps/rejected": -198.30471801757812, "loss": 0.5425, "rewards/accuracies": 1.0, "rewards/chosen": 0.10820051282644272, "rewards/margins": 0.48493844270706177, "rewards/rejected": -0.37673795223236084, "step": 91 }, { "epoch": 0.03, "learning_rate": 1.0454545454545454e-06, "logits/chosen": -0.2530690133571625, "logits/rejected": -0.2599208950996399, "logps/chosen": -202.495361328125, "logps/rejected": -214.04393005371094, "loss": 0.5347, "rewards/accuracies": 1.0, "rewards/chosen": 0.1334606111049652, "rewards/margins": 0.483165442943573, "rewards/rejected": -0.3497048318386078, "step": 92 }, { "epoch": 0.03, "learning_rate": 1.056818181818182e-06, "logits/chosen": -0.2823459804058075, "logits/rejected": -0.299932599067688, "logps/chosen": -212.8408660888672, "logps/rejected": -204.1206512451172, "loss": 0.5416, "rewards/accuracies": 1.0, "rewards/chosen": 0.12048784643411636, "rewards/margins": 0.3460257947444916, "rewards/rejected": -0.22553794085979462, "step": 93 }, { "epoch": 0.03, "learning_rate": 1.068181818181818e-06, "logits/chosen": -0.24119973182678223, "logits/rejected": -0.2592056393623352, "logps/chosen": -236.3755340576172, "logps/rejected": -275.932373046875, "loss": 0.5014, "rewards/accuracies": 1.0, "rewards/chosen": 0.16808678209781647, "rewards/margins": 0.6815425753593445, "rewards/rejected": -0.5134557485580444, "step": 94 }, { "epoch": 0.03, "learning_rate": 1.0795454545454546e-06, "logits/chosen": -0.3309584856033325, "logits/rejected": -0.3334220349788666, "logps/chosen": -112.8882064819336, "logps/rejected": -121.0035629272461, "loss": 0.5301, "rewards/accuracies": 0.875, "rewards/chosen": 0.024409087374806404, "rewards/margins": 0.2664479613304138, "rewards/rejected": -0.24203884601593018, "step": 95 }, { "epoch": 0.03, "learning_rate": 1.0909090909090908e-06, "logits/chosen": -0.4337083399295807, "logits/rejected": -0.42668864130973816, "logps/chosen": -175.67007446289062, "logps/rejected": -154.1935272216797, "loss": 0.5484, "rewards/accuracies": 1.0, "rewards/chosen": 0.07397403568029404, "rewards/margins": 0.2531187832355499, "rewards/rejected": -0.1791447401046753, "step": 96 }, { "epoch": 0.03, "learning_rate": 1.1022727272727273e-06, "logits/chosen": -0.23907527327537537, "logits/rejected": -0.25098279118537903, "logps/chosen": -255.4123992919922, "logps/rejected": -279.53271484375, "loss": 0.5087, "rewards/accuracies": 0.9375, "rewards/chosen": 0.17864878475666046, "rewards/margins": 0.603197455406189, "rewards/rejected": -0.4245486855506897, "step": 97 }, { "epoch": 0.03, "learning_rate": 1.1136363636363635e-06, "logits/chosen": -0.2921161353588104, "logits/rejected": -0.28569892048835754, "logps/chosen": -178.116943359375, "logps/rejected": -158.86248779296875, "loss": 0.4614, "rewards/accuracies": 1.0, "rewards/chosen": 0.11198291927576065, "rewards/margins": 0.5350670218467712, "rewards/rejected": -0.4230841398239136, "step": 98 }, { "epoch": 0.03, "learning_rate": 1.125e-06, "logits/chosen": -0.2839020788669586, "logits/rejected": -0.2888544201850891, "logps/chosen": -178.8844757080078, "logps/rejected": -194.3294219970703, "loss": 0.5092, "rewards/accuracies": 1.0, "rewards/chosen": 0.054926350712776184, "rewards/margins": 0.35526803135871887, "rewards/rejected": -0.3003416955471039, "step": 99 }, { "epoch": 0.03, "learning_rate": 1.1363636363636364e-06, "logits/chosen": -0.26814714074134827, "logits/rejected": -0.2645610272884369, "logps/chosen": -141.58978271484375, "logps/rejected": -125.22686767578125, "loss": 0.5297, "rewards/accuracies": 1.0, "rewards/chosen": 0.0630752369761467, "rewards/margins": 0.3125172555446625, "rewards/rejected": -0.24944201111793518, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.1477272727272727e-06, "logits/chosen": -0.32882678508758545, "logits/rejected": -0.32238173484802246, "logps/chosen": -197.72889709472656, "logps/rejected": -154.53463745117188, "loss": 0.4683, "rewards/accuracies": 0.9375, "rewards/chosen": 0.11453827470541, "rewards/margins": 0.5272652506828308, "rewards/rejected": -0.4127269685268402, "step": 101 }, { "epoch": 0.03, "learning_rate": 1.159090909090909e-06, "logits/chosen": -0.2614593803882599, "logits/rejected": -0.26820310950279236, "logps/chosen": -254.67428588867188, "logps/rejected": -186.9420166015625, "loss": 0.4715, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2316691130399704, "rewards/margins": 0.6147348284721375, "rewards/rejected": -0.38306567072868347, "step": 102 }, { "epoch": 0.04, "learning_rate": 1.1704545454545454e-06, "logits/chosen": -0.20947162806987762, "logits/rejected": -0.21985360980033875, "logps/chosen": -172.54754638671875, "logps/rejected": -198.98965454101562, "loss": 0.4829, "rewards/accuracies": 1.0, "rewards/chosen": 0.10745193809270859, "rewards/margins": 0.6254303455352783, "rewards/rejected": -0.5179784297943115, "step": 103 }, { "epoch": 0.04, "learning_rate": 1.1818181818181818e-06, "logits/chosen": -0.31164488196372986, "logits/rejected": -0.3165520131587982, "logps/chosen": -120.79139709472656, "logps/rejected": -123.66410064697266, "loss": 0.4688, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05448758974671364, "rewards/margins": 0.4392634332180023, "rewards/rejected": -0.3847758173942566, "step": 104 }, { "epoch": 0.04, "learning_rate": 1.1931818181818181e-06, "logits/chosen": -0.3748607933521271, "logits/rejected": -0.3902784585952759, "logps/chosen": -222.30880737304688, "logps/rejected": -250.7407684326172, "loss": 0.4456, "rewards/accuracies": 1.0, "rewards/chosen": 0.1969928741455078, "rewards/margins": 0.8082451224327087, "rewards/rejected": -0.6112521886825562, "step": 105 }, { "epoch": 0.04, "learning_rate": 1.2045454545454545e-06, "logits/chosen": -0.23596064746379852, "logits/rejected": -0.2437048852443695, "logps/chosen": -201.08782958984375, "logps/rejected": -193.86483764648438, "loss": 0.4744, "rewards/accuracies": 1.0, "rewards/chosen": 0.1580810397863388, "rewards/margins": 0.6030731201171875, "rewards/rejected": -0.4449920952320099, "step": 106 }, { "epoch": 0.04, "learning_rate": 1.2159090909090908e-06, "logits/chosen": -0.2758455276489258, "logits/rejected": -0.2816866338253021, "logps/chosen": -187.90516662597656, "logps/rejected": -165.03646850585938, "loss": 0.4612, "rewards/accuracies": 1.0, "rewards/chosen": 0.14558759331703186, "rewards/margins": 0.6994193196296692, "rewards/rejected": -0.5538316965103149, "step": 107 }, { "epoch": 0.04, "learning_rate": 1.2272727272727272e-06, "logits/chosen": -0.27118244767189026, "logits/rejected": -0.27633270621299744, "logps/chosen": -205.90255737304688, "logps/rejected": -171.53683471679688, "loss": 0.4279, "rewards/accuracies": 1.0, "rewards/chosen": 0.22411401569843292, "rewards/margins": 0.79705810546875, "rewards/rejected": -0.5729440450668335, "step": 108 }, { "epoch": 0.04, "learning_rate": 1.2386363636363635e-06, "logits/chosen": -0.22304943203926086, "logits/rejected": -0.21854890882968903, "logps/chosen": -220.76426696777344, "logps/rejected": -198.42559814453125, "loss": 0.456, "rewards/accuracies": 1.0, "rewards/chosen": 0.2585577368736267, "rewards/margins": 0.6996555328369141, "rewards/rejected": -0.44109779596328735, "step": 109 }, { "epoch": 0.04, "learning_rate": 1.2499999999999999e-06, "logits/chosen": -0.26021042466163635, "logits/rejected": -0.27493855357170105, "logps/chosen": -154.87506103515625, "logps/rejected": -214.43991088867188, "loss": 0.438, "rewards/accuracies": 1.0, "rewards/chosen": 0.13620230555534363, "rewards/margins": 0.6442185640335083, "rewards/rejected": -0.5080162286758423, "step": 110 }, { "epoch": 0.04, "learning_rate": 1.2613636363636365e-06, "logits/chosen": -0.2795673906803131, "logits/rejected": -0.2932632565498352, "logps/chosen": -184.40252685546875, "logps/rejected": -201.50775146484375, "loss": 0.4521, "rewards/accuracies": 1.0, "rewards/chosen": 0.1196354478597641, "rewards/margins": 0.6086852550506592, "rewards/rejected": -0.48904985189437866, "step": 111 }, { "epoch": 0.04, "learning_rate": 1.2727272727272726e-06, "logits/chosen": -0.2300095409154892, "logits/rejected": -0.24662262201309204, "logps/chosen": -146.2750244140625, "logps/rejected": -176.50430297851562, "loss": 0.4212, "rewards/accuracies": 1.0, "rewards/chosen": 0.153935045003891, "rewards/margins": 0.8446798324584961, "rewards/rejected": -0.6907448172569275, "step": 112 }, { "epoch": 0.04, "learning_rate": 1.2840909090909092e-06, "logits/chosen": -0.3900449573993683, "logits/rejected": -0.3905314803123474, "logps/chosen": -207.68353271484375, "logps/rejected": -201.84217834472656, "loss": 0.4417, "rewards/accuracies": 1.0, "rewards/chosen": 0.15425121784210205, "rewards/margins": 0.6711564660072327, "rewards/rejected": -0.5169052481651306, "step": 113 }, { "epoch": 0.04, "learning_rate": 1.2954545454545453e-06, "logits/chosen": -0.32928410172462463, "logits/rejected": -0.32355135679244995, "logps/chosen": -177.33853149414062, "logps/rejected": -162.58676147460938, "loss": 0.4403, "rewards/accuracies": 0.9375, "rewards/chosen": 0.10238675773143768, "rewards/margins": 0.4705532193183899, "rewards/rejected": -0.3681664764881134, "step": 114 }, { "epoch": 0.04, "learning_rate": 1.3068181818181819e-06, "logits/chosen": -0.3315610885620117, "logits/rejected": -0.3430657386779785, "logps/chosen": -208.52061462402344, "logps/rejected": -203.94964599609375, "loss": 0.4102, "rewards/accuracies": 1.0, "rewards/chosen": 0.16826194524765015, "rewards/margins": 0.9024280309677124, "rewards/rejected": -0.7341660857200623, "step": 115 }, { "epoch": 0.04, "learning_rate": 1.318181818181818e-06, "logits/chosen": -0.317382276058197, "logits/rejected": -0.32036662101745605, "logps/chosen": -175.7088165283203, "logps/rejected": -184.56007385253906, "loss": 0.3965, "rewards/accuracies": 0.9375, "rewards/chosen": 0.08706095814704895, "rewards/margins": 0.726379930973053, "rewards/rejected": -0.6393190026283264, "step": 116 }, { "epoch": 0.04, "learning_rate": 1.3295454545454546e-06, "logits/chosen": -0.3113667666912079, "logits/rejected": -0.30850234627723694, "logps/chosen": -209.80926513671875, "logps/rejected": -182.0028076171875, "loss": 0.4213, "rewards/accuracies": 1.0, "rewards/chosen": 0.1665186882019043, "rewards/margins": 0.6158881187438965, "rewards/rejected": -0.4493694007396698, "step": 117 }, { "epoch": 0.04, "learning_rate": 1.3409090909090907e-06, "logits/chosen": -0.24614164233207703, "logits/rejected": -0.24931690096855164, "logps/chosen": -159.66810607910156, "logps/rejected": -194.7740936279297, "loss": 0.4118, "rewards/accuracies": 1.0, "rewards/chosen": 0.13190045952796936, "rewards/margins": 1.0180938243865967, "rewards/rejected": -0.8861933946609497, "step": 118 }, { "epoch": 0.04, "learning_rate": 1.3522727272727273e-06, "logits/chosen": -0.2081093192100525, "logits/rejected": -0.2145293951034546, "logps/chosen": -98.24690246582031, "logps/rejected": -125.65071105957031, "loss": 0.4219, "rewards/accuracies": 0.9375, "rewards/chosen": 0.028991155326366425, "rewards/margins": 0.6292340159416199, "rewards/rejected": -0.6002429127693176, "step": 119 }, { "epoch": 0.04, "learning_rate": 1.3636363636363634e-06, "logits/chosen": -0.2834087312221527, "logits/rejected": -0.28440916538238525, "logps/chosen": -145.87591552734375, "logps/rejected": -143.73382568359375, "loss": 0.3921, "rewards/accuracies": 1.0, "rewards/chosen": 0.09373851865530014, "rewards/margins": 0.7656128406524658, "rewards/rejected": -0.6718742847442627, "step": 120 }, { "epoch": 0.04, "learning_rate": 1.375e-06, "logits/chosen": -0.23533210158348083, "logits/rejected": -0.23994754254817963, "logps/chosen": -225.70562744140625, "logps/rejected": -191.5024871826172, "loss": 0.3901, "rewards/accuracies": 1.0, "rewards/chosen": 0.1738625466823578, "rewards/margins": 0.7662487030029297, "rewards/rejected": -0.5923861265182495, "step": 121 }, { "epoch": 0.04, "learning_rate": 1.3863636363636363e-06, "logits/chosen": -0.2936233878135681, "logits/rejected": -0.2864232361316681, "logps/chosen": -214.13616943359375, "logps/rejected": -178.05882263183594, "loss": 0.4135, "rewards/accuracies": 1.0, "rewards/chosen": 0.2017645537853241, "rewards/margins": 0.8474429845809937, "rewards/rejected": -0.6456784009933472, "step": 122 }, { "epoch": 0.04, "learning_rate": 1.3977272727272727e-06, "logits/chosen": -0.2096487432718277, "logits/rejected": -0.21864084899425507, "logps/chosen": -155.02088928222656, "logps/rejected": -182.95851135253906, "loss": 0.3798, "rewards/accuracies": 0.9375, "rewards/chosen": 0.06318338215351105, "rewards/margins": 1.0848556756973267, "rewards/rejected": -1.021672248840332, "step": 123 }, { "epoch": 0.04, "learning_rate": 1.409090909090909e-06, "logits/chosen": -0.3720046579837799, "logits/rejected": -0.38072076439857483, "logps/chosen": -158.5787353515625, "logps/rejected": -191.99363708496094, "loss": 0.368, "rewards/accuracies": 1.0, "rewards/chosen": 0.17912383377552032, "rewards/margins": 0.874208927154541, "rewards/rejected": -0.6950851082801819, "step": 124 }, { "epoch": 0.04, "learning_rate": 1.4204545454545454e-06, "logits/chosen": -0.2715194523334503, "logits/rejected": -0.2809720039367676, "logps/chosen": -179.04879760742188, "logps/rejected": -203.93362426757812, "loss": 0.3482, "rewards/accuracies": 1.0, "rewards/chosen": 0.20429396629333496, "rewards/margins": 1.2084670066833496, "rewards/rejected": -1.0041730403900146, "step": 125 }, { "epoch": 0.04, "learning_rate": 1.4318181818181817e-06, "logits/chosen": -0.37045150995254517, "logits/rejected": -0.37621599435806274, "logps/chosen": -154.33631896972656, "logps/rejected": -157.79010009765625, "loss": 0.3986, "rewards/accuracies": 1.0, "rewards/chosen": 0.08530270308256149, "rewards/margins": 0.60507732629776, "rewards/rejected": -0.5197745561599731, "step": 126 }, { "epoch": 0.04, "learning_rate": 1.443181818181818e-06, "logits/chosen": -0.3776751458644867, "logits/rejected": -0.39013615250587463, "logps/chosen": -171.34979248046875, "logps/rejected": -184.8634490966797, "loss": 0.3994, "rewards/accuracies": 1.0, "rewards/chosen": 0.24347425997257233, "rewards/margins": 0.9789813756942749, "rewards/rejected": -0.7355071306228638, "step": 127 }, { "epoch": 0.04, "learning_rate": 1.4545454545454544e-06, "logits/chosen": -0.32203423976898193, "logits/rejected": -0.3347160220146179, "logps/chosen": -190.24795532226562, "logps/rejected": -176.648681640625, "loss": 0.3975, "rewards/accuracies": 1.0, "rewards/chosen": 0.18437963724136353, "rewards/margins": 1.1202683448791504, "rewards/rejected": -0.9358887076377869, "step": 128 }, { "epoch": 0.04, "learning_rate": 1.4659090909090908e-06, "logits/chosen": -0.40195533633232117, "logits/rejected": -0.4068918228149414, "logps/chosen": -204.0840606689453, "logps/rejected": -203.23899841308594, "loss": 0.4121, "rewards/accuracies": 1.0, "rewards/chosen": 0.30179303884506226, "rewards/margins": 1.202272891998291, "rewards/rejected": -0.9004798531532288, "step": 129 }, { "epoch": 0.04, "learning_rate": 1.4772727272727271e-06, "logits/chosen": -0.2693186402320862, "logits/rejected": -0.281973272562027, "logps/chosen": -158.54212951660156, "logps/rejected": -175.02713012695312, "loss": 0.3525, "rewards/accuracies": 1.0, "rewards/chosen": 0.0762750655412674, "rewards/margins": 0.9437751173973083, "rewards/rejected": -0.8675000667572021, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.4886363636363635e-06, "logits/chosen": -0.2922983169555664, "logits/rejected": -0.28905797004699707, "logps/chosen": -218.3372802734375, "logps/rejected": -225.04345703125, "loss": 0.3463, "rewards/accuracies": 1.0, "rewards/chosen": 0.2669279873371124, "rewards/margins": 1.4979889392852783, "rewards/rejected": -1.2310609817504883, "step": 131 }, { "epoch": 0.05, "learning_rate": 1.5e-06, "logits/chosen": -0.31873956322669983, "logits/rejected": -0.322784423828125, "logps/chosen": -124.70992279052734, "logps/rejected": -152.57647705078125, "loss": 0.4117, "rewards/accuracies": 1.0, "rewards/chosen": 0.13669049739837646, "rewards/margins": 0.7656825184822083, "rewards/rejected": -0.628991961479187, "step": 132 }, { "epoch": 0.05, "learning_rate": 1.5113636363636364e-06, "logits/chosen": -0.34708261489868164, "logits/rejected": -0.344554603099823, "logps/chosen": -183.201416015625, "logps/rejected": -160.19871520996094, "loss": 0.3978, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1976192146539688, "rewards/margins": 0.8576391935348511, "rewards/rejected": -0.6600199937820435, "step": 133 }, { "epoch": 0.05, "learning_rate": 1.5227272727272727e-06, "logits/chosen": -0.3323155343532562, "logits/rejected": -0.33461064100265503, "logps/chosen": -236.8403778076172, "logps/rejected": -248.24327087402344, "loss": 0.311, "rewards/accuracies": 1.0, "rewards/chosen": 0.3269212245941162, "rewards/margins": 1.4122623205184937, "rewards/rejected": -1.085341215133667, "step": 134 }, { "epoch": 0.05, "learning_rate": 1.534090909090909e-06, "logits/chosen": -0.2644631862640381, "logits/rejected": -0.2777220606803894, "logps/chosen": -157.43328857421875, "logps/rejected": -212.07070922851562, "loss": 0.3865, "rewards/accuracies": 0.9375, "rewards/chosen": 0.10111045837402344, "rewards/margins": 0.9367893934249878, "rewards/rejected": -0.8356789350509644, "step": 135 }, { "epoch": 0.05, "learning_rate": 1.5454545454545454e-06, "logits/chosen": -0.36522430181503296, "logits/rejected": -0.3888193964958191, "logps/chosen": -176.78811645507812, "logps/rejected": -183.68313598632812, "loss": 0.3721, "rewards/accuracies": 1.0, "rewards/chosen": 0.270674467086792, "rewards/margins": 1.1387768983840942, "rewards/rejected": -0.8681024312973022, "step": 136 }, { "epoch": 0.05, "learning_rate": 1.5568181818181818e-06, "logits/chosen": -0.3115520477294922, "logits/rejected": -0.31727683544158936, "logps/chosen": -193.74058532714844, "logps/rejected": -163.82839965820312, "loss": 0.3643, "rewards/accuracies": 1.0, "rewards/chosen": 0.21308816969394684, "rewards/margins": 1.1288399696350098, "rewards/rejected": -0.9157517552375793, "step": 137 }, { "epoch": 0.05, "learning_rate": 1.5681818181818181e-06, "logits/chosen": -0.28774333000183105, "logits/rejected": -0.28708741068840027, "logps/chosen": -195.24732971191406, "logps/rejected": -164.86572265625, "loss": 0.3762, "rewards/accuracies": 1.0, "rewards/chosen": 0.28013092279434204, "rewards/margins": 1.2544047832489014, "rewards/rejected": -0.9742739200592041, "step": 138 }, { "epoch": 0.05, "learning_rate": 1.5795454545454545e-06, "logits/chosen": -0.2292327880859375, "logits/rejected": -0.23523594439029694, "logps/chosen": -124.31800842285156, "logps/rejected": -118.8453369140625, "loss": 0.3495, "rewards/accuracies": 1.0, "rewards/chosen": 0.18718545138835907, "rewards/margins": 1.2792010307312012, "rewards/rejected": -1.0920155048370361, "step": 139 }, { "epoch": 0.05, "learning_rate": 1.5909090909090908e-06, "logits/chosen": -0.34182170033454895, "logits/rejected": -0.3457019329071045, "logps/chosen": -146.86514282226562, "logps/rejected": -188.8411407470703, "loss": 0.2929, "rewards/accuracies": 1.0, "rewards/chosen": -0.05626543238759041, "rewards/margins": 1.3213804960250854, "rewards/rejected": -1.3776459693908691, "step": 140 }, { "epoch": 0.05, "learning_rate": 1.6022727272727272e-06, "logits/chosen": -0.26892855763435364, "logits/rejected": -0.26342225074768066, "logps/chosen": -201.35934448242188, "logps/rejected": -175.44143676757812, "loss": 0.3659, "rewards/accuracies": 1.0, "rewards/chosen": 0.37645649909973145, "rewards/margins": 1.2960493564605713, "rewards/rejected": -0.9195929169654846, "step": 141 }, { "epoch": 0.05, "learning_rate": 1.6136363636363635e-06, "logits/chosen": -0.3085240125656128, "logits/rejected": -0.3257598876953125, "logps/chosen": -175.3349151611328, "logps/rejected": -200.6924591064453, "loss": 0.3448, "rewards/accuracies": 1.0, "rewards/chosen": 0.3023070693016052, "rewards/margins": 1.4338994026184082, "rewards/rejected": -1.1315922737121582, "step": 142 }, { "epoch": 0.05, "learning_rate": 1.625e-06, "logits/chosen": -0.2711722254753113, "logits/rejected": -0.2624908983707428, "logps/chosen": -209.41744995117188, "logps/rejected": -233.4835205078125, "loss": 0.3034, "rewards/accuracies": 1.0, "rewards/chosen": 0.2840636074542999, "rewards/margins": 1.4092286825180054, "rewards/rejected": -1.1251649856567383, "step": 143 }, { "epoch": 0.05, "learning_rate": 1.6363636363636365e-06, "logits/chosen": -0.364714652299881, "logits/rejected": -0.35324376821517944, "logps/chosen": -258.91668701171875, "logps/rejected": -216.81932067871094, "loss": 0.3311, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3898249566555023, "rewards/margins": 1.618047833442688, "rewards/rejected": -1.2282228469848633, "step": 144 }, { "epoch": 0.05, "learning_rate": 1.6477272727272726e-06, "logits/chosen": -0.33866554498672485, "logits/rejected": -0.33231908082962036, "logps/chosen": -208.3145751953125, "logps/rejected": -169.0670928955078, "loss": 0.2945, "rewards/accuracies": 1.0, "rewards/chosen": 0.28327953815460205, "rewards/margins": 1.4729082584381104, "rewards/rejected": -1.1896286010742188, "step": 145 }, { "epoch": 0.05, "learning_rate": 1.6590909090909092e-06, "logits/chosen": -0.38993075489997864, "logits/rejected": -0.38918256759643555, "logps/chosen": -163.18370056152344, "logps/rejected": -117.95189666748047, "loss": 0.3823, "rewards/accuracies": 1.0, "rewards/chosen": 0.15582098066806793, "rewards/margins": 0.7134914398193359, "rewards/rejected": -0.5576704740524292, "step": 146 }, { "epoch": 0.05, "learning_rate": 1.6704545454545453e-06, "logits/chosen": -0.2659554183483124, "logits/rejected": -0.28329434990882874, "logps/chosen": -173.72486877441406, "logps/rejected": -207.60687255859375, "loss": 0.3361, "rewards/accuracies": 1.0, "rewards/chosen": 0.1791793406009674, "rewards/margins": 1.4503183364868164, "rewards/rejected": -1.2711389064788818, "step": 147 }, { "epoch": 0.05, "learning_rate": 1.6818181818181819e-06, "logits/chosen": -0.35611143708229065, "logits/rejected": -0.36863651871681213, "logps/chosen": -178.26632690429688, "logps/rejected": -179.1280059814453, "loss": 0.3257, "rewards/accuracies": 1.0, "rewards/chosen": 0.18712829053401947, "rewards/margins": 1.2854491472244263, "rewards/rejected": -1.0983210802078247, "step": 148 }, { "epoch": 0.05, "learning_rate": 1.693181818181818e-06, "logits/chosen": -0.33429384231567383, "logits/rejected": -0.31669431924819946, "logps/chosen": -251.30148315429688, "logps/rejected": -200.7522735595703, "loss": 0.3247, "rewards/accuracies": 1.0, "rewards/chosen": 0.4027822017669678, "rewards/margins": 1.376225471496582, "rewards/rejected": -0.9734432697296143, "step": 149 }, { "epoch": 0.05, "learning_rate": 1.7045454545454546e-06, "logits/chosen": -0.3323809504508972, "logits/rejected": -0.3417486548423767, "logps/chosen": -248.52059936523438, "logps/rejected": -253.62850952148438, "loss": 0.29, "rewards/accuracies": 1.0, "rewards/chosen": 0.4150841236114502, "rewards/margins": 1.6466784477233887, "rewards/rejected": -1.2315943241119385, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.7159090909090907e-06, "logits/chosen": -0.25727328658103943, "logits/rejected": -0.24772073328495026, "logps/chosen": -156.2876434326172, "logps/rejected": -154.349853515625, "loss": 0.3357, "rewards/accuracies": 1.0, "rewards/chosen": 0.19191306829452515, "rewards/margins": 1.030053734779358, "rewards/rejected": -0.8381407260894775, "step": 151 }, { "epoch": 0.05, "learning_rate": 1.7272727272727273e-06, "logits/chosen": -0.30299800634384155, "logits/rejected": -0.31868135929107666, "logps/chosen": -161.91390991210938, "logps/rejected": -193.98464965820312, "loss": 0.333, "rewards/accuracies": 1.0, "rewards/chosen": 0.3269992172718048, "rewards/margins": 1.6451348066329956, "rewards/rejected": -1.3181356191635132, "step": 152 }, { "epoch": 0.05, "learning_rate": 1.7386363636363634e-06, "logits/chosen": -0.3536549508571625, "logits/rejected": -0.3524310886859894, "logps/chosen": -172.74868774414062, "logps/rejected": -150.36004638671875, "loss": 0.3604, "rewards/accuracies": 1.0, "rewards/chosen": 0.3546445667743683, "rewards/margins": 1.0972049236297607, "rewards/rejected": -0.7425603866577148, "step": 153 }, { "epoch": 0.05, "learning_rate": 1.75e-06, "logits/chosen": -0.3706408441066742, "logits/rejected": -0.35760635137557983, "logps/chosen": -242.3128204345703, "logps/rejected": -161.6684112548828, "loss": 0.3222, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4728255569934845, "rewards/margins": 1.2228330373764038, "rewards/rejected": -0.7500073909759521, "step": 154 }, { "epoch": 0.05, "learning_rate": 1.7613636363636363e-06, "logits/chosen": -0.32954031229019165, "logits/rejected": -0.33653953671455383, "logps/chosen": -181.8640899658203, "logps/rejected": -196.3209686279297, "loss": 0.3212, "rewards/accuracies": 1.0, "rewards/chosen": 0.2940194308757782, "rewards/margins": 1.3094691038131714, "rewards/rejected": -1.01544988155365, "step": 155 }, { "epoch": 0.05, "learning_rate": 1.7727272727272727e-06, "logits/chosen": -0.30186495184898376, "logits/rejected": -0.3143565356731415, "logps/chosen": -180.0862274169922, "logps/rejected": -220.50979614257812, "loss": 0.2633, "rewards/accuracies": 1.0, "rewards/chosen": 0.15469466149806976, "rewards/margins": 2.2714929580688477, "rewards/rejected": -2.1167986392974854, "step": 156 }, { "epoch": 0.05, "learning_rate": 1.784090909090909e-06, "logits/chosen": -0.3230380117893219, "logits/rejected": -0.3230920732021332, "logps/chosen": -243.75027465820312, "logps/rejected": -184.87823486328125, "loss": 0.3049, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5771952271461487, "rewards/margins": 1.8048925399780273, "rewards/rejected": -1.2276972532272339, "step": 157 }, { "epoch": 0.05, "learning_rate": 1.7954545454545454e-06, "logits/chosen": -0.25977763533592224, "logits/rejected": -0.25769758224487305, "logps/chosen": -238.49710083007812, "logps/rejected": -177.60189819335938, "loss": 0.3355, "rewards/accuracies": 1.0, "rewards/chosen": 0.4980950355529785, "rewards/margins": 1.667729377746582, "rewards/rejected": -1.169634461402893, "step": 158 }, { "epoch": 0.05, "learning_rate": 1.8068181818181817e-06, "logits/chosen": -0.2575993835926056, "logits/rejected": -0.28068456053733826, "logps/chosen": -225.06556701660156, "logps/rejected": -276.8041076660156, "loss": 0.2982, "rewards/accuracies": 1.0, "rewards/chosen": 0.36269697546958923, "rewards/margins": 2.321885824203491, "rewards/rejected": -1.95918869972229, "step": 159 }, { "epoch": 0.05, "learning_rate": 1.818181818181818e-06, "logits/chosen": -0.3549997806549072, "logits/rejected": -0.35693222284317017, "logps/chosen": -203.8976593017578, "logps/rejected": -158.6942596435547, "loss": 0.2283, "rewards/accuracies": 1.0, "rewards/chosen": 0.4173148274421692, "rewards/margins": 1.6055774688720703, "rewards/rejected": -1.1882624626159668, "step": 160 }, { "epoch": 0.05, "learning_rate": 1.8295454545454544e-06, "logits/chosen": -0.3130599558353424, "logits/rejected": -0.3274880051612854, "logps/chosen": -171.8409423828125, "logps/rejected": -171.6768341064453, "loss": 0.2801, "rewards/accuracies": 1.0, "rewards/chosen": 0.4126873314380646, "rewards/margins": 1.514675498008728, "rewards/rejected": -1.1019881963729858, "step": 161 }, { "epoch": 0.06, "learning_rate": 1.8409090909090908e-06, "logits/chosen": -0.3094772696495056, "logits/rejected": -0.3179211914539337, "logps/chosen": -183.70004272460938, "logps/rejected": -180.28758239746094, "loss": 0.3354, "rewards/accuracies": 1.0, "rewards/chosen": 0.438361257314682, "rewards/margins": 1.534982442855835, "rewards/rejected": -1.096621036529541, "step": 162 }, { "epoch": 0.06, "learning_rate": 1.8522727272727271e-06, "logits/chosen": -0.3603965938091278, "logits/rejected": -0.37283095717430115, "logps/chosen": -200.69607543945312, "logps/rejected": -195.78858947753906, "loss": 0.2769, "rewards/accuracies": 1.0, "rewards/chosen": 0.22661729156970978, "rewards/margins": 1.4247251749038696, "rewards/rejected": -1.1981078386306763, "step": 163 }, { "epoch": 0.06, "learning_rate": 1.8636363636363635e-06, "logits/chosen": -0.363903284072876, "logits/rejected": -0.366928368806839, "logps/chosen": -247.1133575439453, "logps/rejected": -231.3555450439453, "loss": 0.2728, "rewards/accuracies": 1.0, "rewards/chosen": 0.3083007037639618, "rewards/margins": 1.2904253005981445, "rewards/rejected": -0.9821245670318604, "step": 164 }, { "epoch": 0.06, "learning_rate": 1.8749999999999998e-06, "logits/chosen": -0.3064025640487671, "logits/rejected": -0.3192734718322754, "logps/chosen": -195.65162658691406, "logps/rejected": -212.5154571533203, "loss": 0.2932, "rewards/accuracies": 1.0, "rewards/chosen": 0.28117692470550537, "rewards/margins": 1.9292131662368774, "rewards/rejected": -1.6480361223220825, "step": 165 }, { "epoch": 0.06, "learning_rate": 1.8863636363636364e-06, "logits/chosen": -0.45054957270622253, "logits/rejected": -0.45004740357398987, "logps/chosen": -208.6829071044922, "logps/rejected": -219.2479705810547, "loss": 0.2769, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5096732974052429, "rewards/margins": 2.1793723106384277, "rewards/rejected": -1.6696991920471191, "step": 166 }, { "epoch": 0.06, "learning_rate": 1.8977272727272725e-06, "logits/chosen": -0.2851313352584839, "logits/rejected": -0.2832231819629669, "logps/chosen": -163.65957641601562, "logps/rejected": -185.815185546875, "loss": 0.3123, "rewards/accuracies": 1.0, "rewards/chosen": 0.2700868546962738, "rewards/margins": 1.601708173751831, "rewards/rejected": -1.3316211700439453, "step": 167 }, { "epoch": 0.06, "learning_rate": 1.909090909090909e-06, "logits/chosen": -0.34624502062797546, "logits/rejected": -0.36010390520095825, "logps/chosen": -150.51687622070312, "logps/rejected": -172.4933624267578, "loss": 0.2855, "rewards/accuracies": 1.0, "rewards/chosen": 0.2559228241443634, "rewards/margins": 1.7777738571166992, "rewards/rejected": -1.5218511819839478, "step": 168 }, { "epoch": 0.06, "learning_rate": 1.9204545454545452e-06, "logits/chosen": -0.3450705409049988, "logits/rejected": -0.3495578169822693, "logps/chosen": -144.66888427734375, "logps/rejected": -173.21197509765625, "loss": 0.3006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2009996771812439, "rewards/margins": 1.6310127973556519, "rewards/rejected": -1.4300131797790527, "step": 169 }, { "epoch": 0.06, "learning_rate": 1.931818181818182e-06, "logits/chosen": -0.35946163535118103, "logits/rejected": -0.3612998127937317, "logps/chosen": -157.59378051757812, "logps/rejected": -170.15237426757812, "loss": 0.3121, "rewards/accuracies": 1.0, "rewards/chosen": 0.1961860954761505, "rewards/margins": 1.424757480621338, "rewards/rejected": -1.2285714149475098, "step": 170 }, { "epoch": 0.06, "learning_rate": 1.943181818181818e-06, "logits/chosen": -0.3184893727302551, "logits/rejected": -0.327816903591156, "logps/chosen": -228.61709594726562, "logps/rejected": -225.01275634765625, "loss": 0.2815, "rewards/accuracies": 1.0, "rewards/chosen": 0.2699580490589142, "rewards/margins": 2.1171441078186035, "rewards/rejected": -1.8471860885620117, "step": 171 }, { "epoch": 0.06, "learning_rate": 1.9545454545454545e-06, "logits/chosen": -0.352602481842041, "logits/rejected": -0.35279709100723267, "logps/chosen": -214.8223876953125, "logps/rejected": -247.01719665527344, "loss": 0.2418, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5870539546012878, "rewards/margins": 2.5224802494049072, "rewards/rejected": -1.9354263544082642, "step": 172 }, { "epoch": 0.06, "learning_rate": 1.9659090909090906e-06, "logits/chosen": -0.46593281626701355, "logits/rejected": -0.47267451882362366, "logps/chosen": -147.84857177734375, "logps/rejected": -198.56100463867188, "loss": 0.2771, "rewards/accuracies": 1.0, "rewards/chosen": 0.298606276512146, "rewards/margins": 2.1140646934509277, "rewards/rejected": -1.8154584169387817, "step": 173 }, { "epoch": 0.06, "learning_rate": 1.977272727272727e-06, "logits/chosen": -0.28259187936782837, "logits/rejected": -0.2842896282672882, "logps/chosen": -259.4579162597656, "logps/rejected": -229.03372192382812, "loss": 0.2599, "rewards/accuracies": 1.0, "rewards/chosen": 0.5963932275772095, "rewards/margins": 2.095283031463623, "rewards/rejected": -1.4988900423049927, "step": 174 }, { "epoch": 0.06, "learning_rate": 1.9886363636363633e-06, "logits/chosen": -0.2977672517299652, "logits/rejected": -0.30242863297462463, "logps/chosen": -205.64593505859375, "logps/rejected": -243.42141723632812, "loss": 0.2544, "rewards/accuracies": 0.9375, "rewards/chosen": 0.30716580152511597, "rewards/margins": 2.1544313430786133, "rewards/rejected": -1.847265601158142, "step": 175 }, { "epoch": 0.06, "learning_rate": 2e-06, "logits/chosen": -0.274402379989624, "logits/rejected": -0.283999502658844, "logps/chosen": -204.69309997558594, "logps/rejected": -215.4349365234375, "loss": 0.2252, "rewards/accuracies": 1.0, "rewards/chosen": 0.36157846450805664, "rewards/margins": 2.636406421661377, "rewards/rejected": -2.274827718734741, "step": 176 }, { "epoch": 0.06, "learning_rate": 1.9999998472569e-06, "logits/chosen": -0.42961445450782776, "logits/rejected": -0.41091832518577576, "logps/chosen": -191.59164428710938, "logps/rejected": -153.3698272705078, "loss": 0.2866, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4867188036441803, "rewards/margins": 1.324954628944397, "rewards/rejected": -0.8382358551025391, "step": 177 }, { "epoch": 0.06, "learning_rate": 1.9999993890276473e-06, "logits/chosen": -0.4076841175556183, "logits/rejected": -0.40979719161987305, "logps/chosen": -233.164794921875, "logps/rejected": -257.7124328613281, "loss": 0.2207, "rewards/accuracies": 1.0, "rewards/chosen": 0.6128305196762085, "rewards/margins": 2.3505940437316895, "rewards/rejected": -1.7377634048461914, "step": 178 }, { "epoch": 0.06, "learning_rate": 1.9999986253123814e-06, "logits/chosen": -0.2812507152557373, "logits/rejected": -0.2974246144294739, "logps/chosen": -252.1658935546875, "logps/rejected": -273.61358642578125, "loss": 0.2005, "rewards/accuracies": 1.0, "rewards/chosen": 0.725446343421936, "rewards/margins": 2.6349167823791504, "rewards/rejected": -1.9094704389572144, "step": 179 }, { "epoch": 0.06, "learning_rate": 1.9999975561113356e-06, "logits/chosen": -0.2875508964061737, "logits/rejected": -0.2999340891838074, "logps/chosen": -215.58270263671875, "logps/rejected": -231.7762451171875, "loss": 0.2662, "rewards/accuracies": 1.0, "rewards/chosen": 0.5057113766670227, "rewards/margins": 2.1260058879852295, "rewards/rejected": -1.620294451713562, "step": 180 }, { "epoch": 0.06, "learning_rate": 1.999996181424837e-06, "logits/chosen": -0.33044302463531494, "logits/rejected": -0.33004382252693176, "logps/chosen": -180.1931610107422, "logps/rejected": -220.991455078125, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": 0.3523242473602295, "rewards/margins": 2.078191041946411, "rewards/rejected": -1.7258667945861816, "step": 181 }, { "epoch": 0.06, "learning_rate": 1.9999945012533045e-06, "logits/chosen": -0.3200615644454956, "logits/rejected": -0.3329431116580963, "logps/chosen": -181.9984893798828, "logps/rejected": -204.6586151123047, "loss": 0.2759, "rewards/accuracies": 1.0, "rewards/chosen": 0.4625142216682434, "rewards/margins": 1.3070244789123535, "rewards/rejected": -0.8445102572441101, "step": 182 }, { "epoch": 0.06, "learning_rate": 1.9999925155972526e-06, "logits/chosen": -0.4561930000782013, "logits/rejected": -0.4507611393928528, "logps/chosen": -226.92367553710938, "logps/rejected": -190.46563720703125, "loss": 0.2474, "rewards/accuracies": 1.0, "rewards/chosen": 0.48420071601867676, "rewards/margins": 1.5956096649169922, "rewards/rejected": -1.1114088296890259, "step": 183 }, { "epoch": 0.06, "learning_rate": 1.9999902244572877e-06, "logits/chosen": -0.382744699716568, "logits/rejected": -0.3773985505104065, "logps/chosen": -141.69898986816406, "logps/rejected": -143.16632080078125, "loss": 0.2632, "rewards/accuracies": 1.0, "rewards/chosen": 0.28781914710998535, "rewards/margins": 1.367562174797058, "rewards/rejected": -1.0797429084777832, "step": 184 }, { "epoch": 0.06, "learning_rate": 1.999987627834109e-06, "logits/chosen": -0.36295321583747864, "logits/rejected": -0.3644123077392578, "logps/chosen": -187.8894805908203, "logps/rejected": -216.66488647460938, "loss": 0.2583, "rewards/accuracies": 1.0, "rewards/chosen": 0.4001001715660095, "rewards/margins": 2.0974302291870117, "rewards/rejected": -1.6973302364349365, "step": 185 }, { "epoch": 0.06, "learning_rate": 1.99998472572851e-06, "logits/chosen": -0.2821238934993744, "logits/rejected": -0.28606975078582764, "logps/chosen": -214.45132446289062, "logps/rejected": -162.1883087158203, "loss": 0.238, "rewards/accuracies": 1.0, "rewards/chosen": 0.8366076946258545, "rewards/margins": 2.076646566390991, "rewards/rejected": -1.2400389909744263, "step": 186 }, { "epoch": 0.06, "learning_rate": 1.999981518141378e-06, "logits/chosen": -0.3234507143497467, "logits/rejected": -0.317643404006958, "logps/chosen": -174.46249389648438, "logps/rejected": -195.84683227539062, "loss": 0.2365, "rewards/accuracies": 1.0, "rewards/chosen": 0.6448962688446045, "rewards/margins": 1.9421424865722656, "rewards/rejected": -1.2972462177276611, "step": 187 }, { "epoch": 0.06, "learning_rate": 1.9999780050736915e-06, "logits/chosen": -0.3118136525154114, "logits/rejected": -0.3142116665840149, "logps/chosen": -145.37588500976562, "logps/rejected": -182.06295776367188, "loss": 0.2342, "rewards/accuracies": 1.0, "rewards/chosen": 0.45830821990966797, "rewards/margins": 1.8478177785873413, "rewards/rejected": -1.3895095586776733, "step": 188 }, { "epoch": 0.06, "learning_rate": 1.999974186526525e-06, "logits/chosen": -0.3827754557132721, "logits/rejected": -0.3845282196998596, "logps/chosen": -221.16400146484375, "logps/rejected": -254.7911376953125, "loss": 0.231, "rewards/accuracies": 1.0, "rewards/chosen": 0.5040119886398315, "rewards/margins": 2.5178611278533936, "rewards/rejected": -2.0138492584228516, "step": 189 }, { "epoch": 0.06, "learning_rate": 1.999970062501044e-06, "logits/chosen": -0.3376937508583069, "logits/rejected": -0.3527833819389343, "logps/chosen": -189.91122436523438, "logps/rejected": -213.150634765625, "loss": 0.1966, "rewards/accuracies": 1.0, "rewards/chosen": 0.2592636048793793, "rewards/margins": 2.533296823501587, "rewards/rejected": -2.274033546447754, "step": 190 }, { "epoch": 0.07, "learning_rate": 1.9999656329985093e-06, "logits/chosen": -0.3810875117778778, "logits/rejected": -0.38399288058280945, "logps/chosen": -143.480712890625, "logps/rejected": -174.9514923095703, "loss": 0.258, "rewards/accuracies": 1.0, "rewards/chosen": 0.25328096747398376, "rewards/margins": 1.7746943235397339, "rewards/rejected": -1.5214132070541382, "step": 191 }, { "epoch": 0.07, "learning_rate": 1.9999608980202735e-06, "logits/chosen": -0.3639904856681824, "logits/rejected": -0.3611850440502167, "logps/chosen": -220.4017333984375, "logps/rejected": -217.6968994140625, "loss": 0.2477, "rewards/accuracies": 1.0, "rewards/chosen": 0.4621654748916626, "rewards/margins": 2.2304906845092773, "rewards/rejected": -1.7683253288269043, "step": 192 }, { "epoch": 0.07, "learning_rate": 1.9999558575677825e-06, "logits/chosen": -0.36822712421417236, "logits/rejected": -0.36847513914108276, "logps/chosen": -239.00592041015625, "logps/rejected": -241.21571350097656, "loss": 0.1878, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8591189980506897, "rewards/margins": 2.722846031188965, "rewards/rejected": -1.8637269735336304, "step": 193 }, { "epoch": 0.07, "learning_rate": 1.999950511642577e-06, "logits/chosen": -0.37119060754776, "logits/rejected": -0.35633188486099243, "logps/chosen": -137.1865234375, "logps/rejected": -134.4169158935547, "loss": 0.2192, "rewards/accuracies": 1.0, "rewards/chosen": 0.2563702166080475, "rewards/margins": 1.7209452390670776, "rewards/rejected": -1.464575171470642, "step": 194 }, { "epoch": 0.07, "learning_rate": 1.9999448602462903e-06, "logits/chosen": -0.25567686557769775, "logits/rejected": -0.2675867974758148, "logps/chosen": -213.06170654296875, "logps/rejected": -239.99490356445312, "loss": 0.1825, "rewards/accuracies": 1.0, "rewards/chosen": 0.5798842906951904, "rewards/margins": 2.712160348892212, "rewards/rejected": -2.1322758197784424, "step": 195 }, { "epoch": 0.07, "learning_rate": 1.9999389033806476e-06, "logits/chosen": -0.34242311120033264, "logits/rejected": -0.34537631273269653, "logps/chosen": -135.60543823242188, "logps/rejected": -150.37777709960938, "loss": 0.2465, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3533982038497925, "rewards/margins": 2.6619157791137695, "rewards/rejected": -2.3085176944732666, "step": 196 }, { "epoch": 0.07, "learning_rate": 1.99993264104747e-06, "logits/chosen": -0.3681905269622803, "logits/rejected": -0.37906891107559204, "logps/chosen": -167.94505310058594, "logps/rejected": -196.64370727539062, "loss": 0.2441, "rewards/accuracies": 1.0, "rewards/chosen": 0.5272888541221619, "rewards/margins": 2.0636868476867676, "rewards/rejected": -1.5363980531692505, "step": 197 }, { "epoch": 0.07, "learning_rate": 1.9999260732486695e-06, "logits/chosen": -0.33136898279190063, "logits/rejected": -0.33918362855911255, "logps/chosen": -129.62130737304688, "logps/rejected": -162.0132293701172, "loss": 0.2224, "rewards/accuracies": 1.0, "rewards/chosen": 0.4048194885253906, "rewards/margins": 1.705030918121338, "rewards/rejected": -1.3002114295959473, "step": 198 }, { "epoch": 0.07, "learning_rate": 1.9999191999862534e-06, "logits/chosen": -0.2947547733783722, "logits/rejected": -0.3080625534057617, "logps/chosen": -189.3775634765625, "logps/rejected": -240.10922241210938, "loss": 0.2156, "rewards/accuracies": 1.0, "rewards/chosen": 0.4878634810447693, "rewards/margins": 2.2571334838867188, "rewards/rejected": -1.7692700624465942, "step": 199 }, { "epoch": 0.07, "learning_rate": 1.9999120212623206e-06, "logits/chosen": -0.3830346167087555, "logits/rejected": -0.38656216859817505, "logps/chosen": -204.0417938232422, "logps/rejected": -197.99659729003906, "loss": 0.2096, "rewards/accuracies": 1.0, "rewards/chosen": 0.652113676071167, "rewards/margins": 2.862515687942505, "rewards/rejected": -2.210402011871338, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.999904537079064e-06, "logits/chosen": -0.31310850381851196, "logits/rejected": -0.3133777678012848, "logps/chosen": -219.43885803222656, "logps/rejected": -182.9539794921875, "loss": 0.2589, "rewards/accuracies": 0.875, "rewards/chosen": 0.9963837265968323, "rewards/margins": 2.48873233795166, "rewards/rejected": -1.4923487901687622, "step": 201 }, { "epoch": 0.07, "learning_rate": 1.999896747438771e-06, "logits/chosen": -0.3740100562572479, "logits/rejected": -0.37298545241355896, "logps/chosen": -208.18576049804688, "logps/rejected": -233.30120849609375, "loss": 0.2207, "rewards/accuracies": 1.0, "rewards/chosen": 0.8809073567390442, "rewards/margins": 2.6516900062561035, "rewards/rejected": -1.770782470703125, "step": 202 }, { "epoch": 0.07, "learning_rate": 1.99988865234382e-06, "logits/chosen": -0.30840399861335754, "logits/rejected": -0.30960190296173096, "logps/chosen": -217.93284606933594, "logps/rejected": -233.58984375, "loss": 0.1959, "rewards/accuracies": 1.0, "rewards/chosen": 0.6821208000183105, "rewards/margins": 3.2285146713256836, "rewards/rejected": -2.546393871307373, "step": 203 }, { "epoch": 0.07, "learning_rate": 1.999880251796685e-06, "logits/chosen": -0.35711967945098877, "logits/rejected": -0.35781553387641907, "logps/chosen": -184.20018005371094, "logps/rejected": -212.81341552734375, "loss": 0.1737, "rewards/accuracies": 1.0, "rewards/chosen": 0.3661664128303528, "rewards/margins": 2.6691091060638428, "rewards/rejected": -2.3029425144195557, "step": 204 }, { "epoch": 0.07, "learning_rate": 1.9998715457999313e-06, "logits/chosen": -0.27520424127578735, "logits/rejected": -0.28048276901245117, "logps/chosen": -245.36866760253906, "logps/rejected": -226.0376434326172, "loss": 0.1978, "rewards/accuracies": 1.0, "rewards/chosen": 0.7478725910186768, "rewards/margins": 2.18715763092041, "rewards/rejected": -1.4392849206924438, "step": 205 }, { "epoch": 0.07, "learning_rate": 1.999862534356219e-06, "logits/chosen": -0.37940049171447754, "logits/rejected": -0.38574647903442383, "logps/chosen": -239.5470428466797, "logps/rejected": -265.0993957519531, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": 0.4890502095222473, "rewards/margins": 2.9412147998809814, "rewards/rejected": -2.452164649963379, "step": 206 }, { "epoch": 0.07, "learning_rate": 1.9998532174683005e-06, "logits/chosen": -0.30833494663238525, "logits/rejected": -0.3053240180015564, "logps/chosen": -159.24874877929688, "logps/rejected": -188.93548583984375, "loss": 0.2378, "rewards/accuracies": 1.0, "rewards/chosen": 0.37394481897354126, "rewards/margins": 2.236549139022827, "rewards/rejected": -1.8626046180725098, "step": 207 }, { "epoch": 0.07, "learning_rate": 1.999843595139023e-06, "logits/chosen": -0.2792358100414276, "logits/rejected": -0.29177311062812805, "logps/chosen": -266.1553955078125, "logps/rejected": -304.0266418457031, "loss": 0.1949, "rewards/accuracies": 1.0, "rewards/chosen": 1.1429691314697266, "rewards/margins": 3.8178634643554688, "rewards/rejected": -2.674894094467163, "step": 208 }, { "epoch": 0.07, "learning_rate": 1.999833667371325e-06, "logits/chosen": -0.28099367022514343, "logits/rejected": -0.2831069827079773, "logps/chosen": -118.62987518310547, "logps/rejected": -110.0357437133789, "loss": 0.2539, "rewards/accuracies": 0.9375, "rewards/chosen": 0.22580982744693756, "rewards/margins": 1.026469111442566, "rewards/rejected": -0.8006592988967896, "step": 209 }, { "epoch": 0.07, "learning_rate": 1.9998234341682398e-06, "logits/chosen": -0.3221505880355835, "logits/rejected": -0.3348507881164551, "logps/chosen": -144.4403533935547, "logps/rejected": -210.0201873779297, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": 0.5336707830429077, "rewards/margins": 3.13460111618042, "rewards/rejected": -2.6009302139282227, "step": 210 }, { "epoch": 0.07, "learning_rate": 1.999812895532893e-06, "logits/chosen": -0.270802766084671, "logits/rejected": -0.27597710490226746, "logps/chosen": -181.91549682617188, "logps/rejected": -193.3711700439453, "loss": 0.1948, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3958350718021393, "rewards/margins": 2.468811273574829, "rewards/rejected": -2.0729761123657227, "step": 211 }, { "epoch": 0.07, "learning_rate": 1.9998020514685043e-06, "logits/chosen": -0.34390369057655334, "logits/rejected": -0.3302321434020996, "logps/chosen": -137.36236572265625, "logps/rejected": -136.5703125, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": 0.33861562609672546, "rewards/margins": 2.084806203842163, "rewards/rejected": -1.7461906671524048, "step": 212 }, { "epoch": 0.07, "learning_rate": 1.999790901978387e-06, "logits/chosen": -0.19485239684581757, "logits/rejected": -0.20154951512813568, "logps/chosen": -285.19573974609375, "logps/rejected": -290.5989685058594, "loss": 0.1846, "rewards/accuracies": 1.0, "rewards/chosen": 0.9565669298171997, "rewards/margins": 3.867053270339966, "rewards/rejected": -2.9104862213134766, "step": 213 }, { "epoch": 0.07, "learning_rate": 1.9997794470659457e-06, "logits/chosen": -0.3205135762691498, "logits/rejected": -0.3220437467098236, "logps/chosen": -149.219482421875, "logps/rejected": -142.06369018554688, "loss": 0.1972, "rewards/accuracies": 1.0, "rewards/chosen": 0.5637794733047485, "rewards/margins": 2.441586971282959, "rewards/rejected": -1.8778076171875, "step": 214 }, { "epoch": 0.07, "learning_rate": 1.9997676867346814e-06, "logits/chosen": -0.3555201292037964, "logits/rejected": -0.3538631796836853, "logps/chosen": -207.57208251953125, "logps/rejected": -229.2569580078125, "loss": 0.1806, "rewards/accuracies": 1.0, "rewards/chosen": 0.5596169829368591, "rewards/margins": 2.8971991539001465, "rewards/rejected": -2.3375821113586426, "step": 215 }, { "epoch": 0.07, "learning_rate": 1.9997556209881857e-06, "logits/chosen": -0.43195968866348267, "logits/rejected": -0.4318148195743561, "logps/chosen": -210.621337890625, "logps/rejected": -228.87913513183594, "loss": 0.1705, "rewards/accuracies": 1.0, "rewards/chosen": 0.45233356952667236, "rewards/margins": 3.0677645206451416, "rewards/rejected": -2.615431070327759, "step": 216 }, { "epoch": 0.07, "learning_rate": 1.9997432498301446e-06, "logits/chosen": -0.2811325192451477, "logits/rejected": -0.2803207337856293, "logps/chosen": -191.27053833007812, "logps/rejected": -175.442138671875, "loss": 0.1646, "rewards/accuracies": 1.0, "rewards/chosen": 0.5830634832382202, "rewards/margins": 2.996595621109009, "rewards/rejected": -2.41353178024292, "step": 217 }, { "epoch": 0.07, "learning_rate": 1.9997305732643373e-06, "logits/chosen": -0.31959226727485657, "logits/rejected": -0.32747212052345276, "logps/chosen": -156.7407684326172, "logps/rejected": -175.57546997070312, "loss": 0.2204, "rewards/accuracies": 1.0, "rewards/chosen": 0.5795095562934875, "rewards/margins": 2.428359270095825, "rewards/rejected": -1.8488497734069824, "step": 218 }, { "epoch": 0.07, "learning_rate": 1.9997175912946367e-06, "logits/chosen": -0.4725140929222107, "logits/rejected": -0.4669742286205292, "logps/chosen": -107.90432739257812, "logps/rejected": -134.8014678955078, "loss": 0.1976, "rewards/accuracies": 0.875, "rewards/chosen": 0.19217520952224731, "rewards/margins": 2.0542373657226562, "rewards/rejected": -1.8620622158050537, "step": 219 }, { "epoch": 0.08, "learning_rate": 1.999704303925008e-06, "logits/chosen": -0.42180731892585754, "logits/rejected": -0.4307512640953064, "logps/chosen": -229.39796447753906, "logps/rejected": -290.0960388183594, "loss": 0.1878, "rewards/accuracies": 1.0, "rewards/chosen": 0.40412336587905884, "rewards/margins": 2.962850570678711, "rewards/rejected": -2.558727264404297, "step": 220 }, { "epoch": 0.08, "learning_rate": 1.999690711159511e-06, "logits/chosen": -0.4199782609939575, "logits/rejected": -0.42063722014427185, "logps/chosen": -149.93287658691406, "logps/rejected": -171.42381286621094, "loss": 0.2859, "rewards/accuracies": 1.0, "rewards/chosen": 0.5581015348434448, "rewards/margins": 1.768821120262146, "rewards/rejected": -1.2107195854187012, "step": 221 }, { "epoch": 0.08, "learning_rate": 1.9996768130022977e-06, "logits/chosen": -0.26807281374931335, "logits/rejected": -0.26970136165618896, "logps/chosen": -203.0836181640625, "logps/rejected": -157.84799194335938, "loss": 0.1828, "rewards/accuracies": 1.0, "rewards/chosen": 0.5401497483253479, "rewards/margins": 2.3988616466522217, "rewards/rejected": -1.8587119579315186, "step": 222 }, { "epoch": 0.08, "learning_rate": 1.999662609457614e-06, "logits/chosen": -0.3661916255950928, "logits/rejected": -0.3654862940311432, "logps/chosen": -167.22821044921875, "logps/rejected": -204.4048614501953, "loss": 0.1602, "rewards/accuracies": 1.0, "rewards/chosen": 0.5016471147537231, "rewards/margins": 2.970036268234253, "rewards/rejected": -2.4683895111083984, "step": 223 }, { "epoch": 0.08, "learning_rate": 1.9996481005297987e-06, "logits/chosen": -0.34777942299842834, "logits/rejected": -0.35614970326423645, "logps/chosen": -193.17318725585938, "logps/rejected": -247.3836212158203, "loss": 0.1836, "rewards/accuracies": 1.0, "rewards/chosen": 0.28111785650253296, "rewards/margins": 3.9534659385681152, "rewards/rejected": -3.6723480224609375, "step": 224 }, { "epoch": 0.08, "learning_rate": 1.999633286223284e-06, "logits/chosen": -0.40704217553138733, "logits/rejected": -0.4135701358318329, "logps/chosen": -157.8486785888672, "logps/rejected": -218.0732421875, "loss": 0.2076, "rewards/accuracies": 1.0, "rewards/chosen": 0.46794527769088745, "rewards/margins": 2.6489577293395996, "rewards/rejected": -2.1810121536254883, "step": 225 }, { "epoch": 0.08, "learning_rate": 1.9996181665425957e-06, "logits/chosen": -0.30027666687965393, "logits/rejected": -0.2997095286846161, "logps/chosen": -208.30137634277344, "logps/rejected": -181.11976623535156, "loss": 0.1743, "rewards/accuracies": 1.0, "rewards/chosen": 1.171264886856079, "rewards/margins": 3.2315077781677246, "rewards/rejected": -2.0602428913116455, "step": 226 }, { "epoch": 0.08, "learning_rate": 1.9996027414923523e-06, "logits/chosen": -0.32722243666648865, "logits/rejected": -0.3392205536365509, "logps/chosen": -224.1669464111328, "logps/rejected": -220.43911743164062, "loss": 0.2049, "rewards/accuracies": 1.0, "rewards/chosen": 0.743032693862915, "rewards/margins": 3.80613374710083, "rewards/rejected": -3.063101291656494, "step": 227 }, { "epoch": 0.08, "learning_rate": 1.9995870110772666e-06, "logits/chosen": -0.31802454590797424, "logits/rejected": -0.3242221474647522, "logps/chosen": -139.18270874023438, "logps/rejected": -152.1363525390625, "loss": 0.2571, "rewards/accuracies": 1.0, "rewards/chosen": 0.3980920910835266, "rewards/margins": 1.8309000730514526, "rewards/rejected": -1.4328080415725708, "step": 228 }, { "epoch": 0.08, "learning_rate": 1.999570975302143e-06, "logits/chosen": -0.3401688039302826, "logits/rejected": -0.3473239839076996, "logps/chosen": -173.44554138183594, "logps/rejected": -206.2012481689453, "loss": 0.1782, "rewards/accuracies": 1.0, "rewards/chosen": 0.3735518455505371, "rewards/margins": 2.857410430908203, "rewards/rejected": -2.483858585357666, "step": 229 }, { "epoch": 0.08, "learning_rate": 1.9995546341718815e-06, "logits/chosen": -0.31684666872024536, "logits/rejected": -0.32694002985954285, "logps/chosen": -156.67733764648438, "logps/rejected": -175.3795166015625, "loss": 0.1967, "rewards/accuracies": 1.0, "rewards/chosen": 0.6403310298919678, "rewards/margins": 3.3421595096588135, "rewards/rejected": -2.701828956604004, "step": 230 }, { "epoch": 0.08, "learning_rate": 1.999537987691473e-06, "logits/chosen": -0.31629592180252075, "logits/rejected": -0.3113624155521393, "logps/chosen": -140.09823608398438, "logps/rejected": -147.64651489257812, "loss": 0.2208, "rewards/accuracies": 1.0, "rewards/chosen": 0.5523765683174133, "rewards/margins": 2.6133694648742676, "rewards/rejected": -2.060993194580078, "step": 231 }, { "epoch": 0.08, "learning_rate": 1.9995210358660034e-06, "logits/chosen": -0.25042399764060974, "logits/rejected": -0.2576683461666107, "logps/chosen": -177.47059631347656, "logps/rejected": -187.0679168701172, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": 0.2847474217414856, "rewards/margins": 3.122446298599243, "rewards/rejected": -2.8376989364624023, "step": 232 }, { "epoch": 0.08, "learning_rate": 1.9995037787006513e-06, "logits/chosen": -0.4201761484146118, "logits/rejected": -0.42421719431877136, "logps/chosen": -148.21632385253906, "logps/rejected": -181.55076599121094, "loss": 0.1655, "rewards/accuracies": 1.0, "rewards/chosen": 0.6635738611221313, "rewards/margins": 2.8629543781280518, "rewards/rejected": -2.19938063621521, "step": 233 }, { "epoch": 0.08, "learning_rate": 1.999486216200688e-06, "logits/chosen": -0.2506200075149536, "logits/rejected": -0.25156301259994507, "logps/chosen": -182.0194549560547, "logps/rejected": -166.2190399169922, "loss": 0.1459, "rewards/accuracies": 0.9375, "rewards/chosen": 0.34792202711105347, "rewards/margins": 2.3643667697906494, "rewards/rejected": -2.0164449214935303, "step": 234 }, { "epoch": 0.08, "learning_rate": 1.9994683483714786e-06, "logits/chosen": -0.3594035804271698, "logits/rejected": -0.3536240756511688, "logps/chosen": -259.1424255371094, "logps/rejected": -250.2661895751953, "loss": 0.217, "rewards/accuracies": 1.0, "rewards/chosen": 0.6322073340415955, "rewards/margins": 3.611070156097412, "rewards/rejected": -2.978862762451172, "step": 235 }, { "epoch": 0.08, "learning_rate": 1.999450175218482e-06, "logits/chosen": -0.3861367106437683, "logits/rejected": -0.3756568431854248, "logps/chosen": -192.21929931640625, "logps/rejected": -199.36550903320312, "loss": 0.134, "rewards/accuracies": 1.0, "rewards/chosen": 0.2934705913066864, "rewards/margins": 3.1997334957122803, "rewards/rejected": -2.9062631130218506, "step": 236 }, { "epoch": 0.08, "learning_rate": 1.9994316967472496e-06, "logits/chosen": -0.3741089701652527, "logits/rejected": -0.3683563470840454, "logps/chosen": -205.50729370117188, "logps/rejected": -207.24928283691406, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": 0.7463660836219788, "rewards/margins": 4.432071208953857, "rewards/rejected": -3.6857056617736816, "step": 237 }, { "epoch": 0.08, "learning_rate": 1.999412912963426e-06, "logits/chosen": -0.44809839129447937, "logits/rejected": -0.43332305550575256, "logps/chosen": -215.87315368652344, "logps/rejected": -204.03280639648438, "loss": 0.1815, "rewards/accuracies": 1.0, "rewards/chosen": 0.9333819150924683, "rewards/margins": 2.382951021194458, "rewards/rejected": -1.4495691061019897, "step": 238 }, { "epoch": 0.08, "learning_rate": 1.9993938238727497e-06, "logits/chosen": -0.35547977685928345, "logits/rejected": -0.35135191679000854, "logps/chosen": -164.1267852783203, "logps/rejected": -176.50132751464844, "loss": 0.2129, "rewards/accuracies": 1.0, "rewards/chosen": 0.1909787952899933, "rewards/margins": 2.6337249279022217, "rewards/rejected": -2.4427459239959717, "step": 239 }, { "epoch": 0.08, "learning_rate": 1.9993744294810526e-06, "logits/chosen": -0.3633762300014496, "logits/rejected": -0.37546736001968384, "logps/chosen": -181.5255889892578, "logps/rejected": -226.68399047851562, "loss": 0.1562, "rewards/accuracies": 1.0, "rewards/chosen": 0.685715913772583, "rewards/margins": 3.1037774085998535, "rewards/rejected": -2.4180610179901123, "step": 240 }, { "epoch": 0.08, "learning_rate": 1.999354729794259e-06, "logits/chosen": -0.33368799090385437, "logits/rejected": -0.33572232723236084, "logps/chosen": -209.41860961914062, "logps/rejected": -195.27838134765625, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": 0.7828496098518372, "rewards/margins": 3.4191269874572754, "rewards/rejected": -2.636277198791504, "step": 241 }, { "epoch": 0.08, "learning_rate": 1.999334724818386e-06, "logits/chosen": -0.3454531729221344, "logits/rejected": -0.3469339907169342, "logps/chosen": -167.29782104492188, "logps/rejected": -183.15586853027344, "loss": 0.1751, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5027809143066406, "rewards/margins": 2.7789852619171143, "rewards/rejected": -2.2762043476104736, "step": 242 }, { "epoch": 0.08, "learning_rate": 1.9993144145595466e-06, "logits/chosen": -0.32699814438819885, "logits/rejected": -0.32355621457099915, "logps/chosen": -177.26470947265625, "logps/rejected": -183.01434326171875, "loss": 0.1773, "rewards/accuracies": 1.0, "rewards/chosen": 0.6309802532196045, "rewards/margins": 2.6389448642730713, "rewards/rejected": -2.007964611053467, "step": 243 }, { "epoch": 0.08, "learning_rate": 1.999293799023944e-06, "logits/chosen": -0.31768518686294556, "logits/rejected": -0.3080422580242157, "logps/chosen": -178.56570434570312, "logps/rejected": -157.32557678222656, "loss": 0.1857, "rewards/accuracies": 1.0, "rewards/chosen": 0.7930704355239868, "rewards/margins": 2.5692217350006104, "rewards/rejected": -1.7761512994766235, "step": 244 }, { "epoch": 0.08, "learning_rate": 1.9992728782178765e-06, "logits/chosen": -0.2991012930870056, "logits/rejected": -0.3184984028339386, "logps/chosen": -260.7341003417969, "logps/rejected": -293.29736328125, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": 0.6683063507080078, "rewards/margins": 5.201171398162842, "rewards/rejected": -4.532865524291992, "step": 245 }, { "epoch": 0.08, "learning_rate": 1.999251652147735e-06, "logits/chosen": -0.3230234682559967, "logits/rejected": -0.33350035548210144, "logps/chosen": -177.8396453857422, "logps/rejected": -231.45892333984375, "loss": 0.1402, "rewards/accuracies": 1.0, "rewards/chosen": 0.2757820188999176, "rewards/margins": 3.3192191123962402, "rewards/rejected": -3.0434372425079346, "step": 246 }, { "epoch": 0.08, "learning_rate": 1.9992301208200035e-06, "logits/chosen": -0.3360312879085541, "logits/rejected": -0.3239460289478302, "logps/chosen": -185.6141815185547, "logps/rejected": -241.19711303710938, "loss": 0.1666, "rewards/accuracies": 1.0, "rewards/chosen": 0.4751449227333069, "rewards/margins": 4.725063800811768, "rewards/rejected": -4.249918460845947, "step": 247 }, { "epoch": 0.08, "learning_rate": 1.9992082842412602e-06, "logits/chosen": -0.39428389072418213, "logits/rejected": -0.3895500898361206, "logps/chosen": -127.42477416992188, "logps/rejected": -168.357666015625, "loss": 0.1597, "rewards/accuracies": 1.0, "rewards/chosen": 0.18148589134216309, "rewards/margins": 2.592433452606201, "rewards/rejected": -2.410947561264038, "step": 248 }, { "epoch": 0.08, "learning_rate": 1.999186142418175e-06, "logits/chosen": -0.40166378021240234, "logits/rejected": -0.3963209390640259, "logps/chosen": -207.4730987548828, "logps/rejected": -205.68641662597656, "loss": 0.1285, "rewards/accuracies": 1.0, "rewards/chosen": 0.7011098861694336, "rewards/margins": 3.630488872528076, "rewards/rejected": -2.929379463195801, "step": 249 }, { "epoch": 0.09, "learning_rate": 1.9991636953575123e-06, "logits/chosen": -0.3604174852371216, "logits/rejected": -0.35144874453544617, "logps/chosen": -182.82188415527344, "logps/rejected": -165.29554748535156, "loss": 0.1256, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5742701888084412, "rewards/margins": 3.3190863132476807, "rewards/rejected": -2.7448158264160156, "step": 250 }, { "epoch": 0.09, "learning_rate": 1.9991409430661296e-06, "logits/chosen": -0.33483177423477173, "logits/rejected": -0.3534334897994995, "logps/chosen": -155.026123046875, "logps/rejected": -183.29061889648438, "loss": 0.1701, "rewards/accuracies": 0.9375, "rewards/chosen": 0.545192301273346, "rewards/margins": 2.7803938388824463, "rewards/rejected": -2.235201597213745, "step": 251 }, { "epoch": 0.09, "learning_rate": 1.999117885550977e-06, "logits/chosen": -0.3808134198188782, "logits/rejected": -0.3926255702972412, "logps/chosen": -218.3611602783203, "logps/rejected": -236.69378662109375, "loss": 0.167, "rewards/accuracies": 1.0, "rewards/chosen": 0.6976996660232544, "rewards/margins": 3.845489263534546, "rewards/rejected": -3.14778995513916, "step": 252 }, { "epoch": 0.09, "learning_rate": 1.999094522819099e-06, "logits/chosen": -0.40457016229629517, "logits/rejected": -0.4167182147502899, "logps/chosen": -190.70806884765625, "logps/rejected": -215.0278778076172, "loss": 0.1846, "rewards/accuracies": 1.0, "rewards/chosen": 0.680283784866333, "rewards/margins": 2.922738552093506, "rewards/rejected": -2.242455005645752, "step": 253 }, { "epoch": 0.09, "learning_rate": 1.999070854877632e-06, "logits/chosen": -0.3563818037509918, "logits/rejected": -0.3699873089790344, "logps/chosen": -147.10098266601562, "logps/rejected": -178.80287170410156, "loss": 0.1636, "rewards/accuracies": 1.0, "rewards/chosen": 0.6475325226783752, "rewards/margins": 2.6231226921081543, "rewards/rejected": -1.9755903482437134, "step": 254 }, { "epoch": 0.09, "learning_rate": 1.999046881733806e-06, "logits/chosen": -0.43569090962409973, "logits/rejected": -0.43889355659484863, "logps/chosen": -148.0570068359375, "logps/rejected": -211.59559631347656, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": 0.1389019638299942, "rewards/margins": 3.1238558292388916, "rewards/rejected": -2.9849538803100586, "step": 255 }, { "epoch": 0.09, "learning_rate": 1.999022603394945e-06, "logits/chosen": -0.42064139246940613, "logits/rejected": -0.42259538173675537, "logps/chosen": -138.04286193847656, "logps/rejected": -174.62820434570312, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": 0.523793637752533, "rewards/margins": 2.4345993995666504, "rewards/rejected": -1.9108057022094727, "step": 256 }, { "epoch": 0.09, "learning_rate": 1.9989980198684654e-06, "logits/chosen": -0.3944424092769623, "logits/rejected": -0.3857417702674866, "logps/chosen": -165.6807403564453, "logps/rejected": -154.4690704345703, "loss": 0.156, "rewards/accuracies": 1.0, "rewards/chosen": 0.5967243909835815, "rewards/margins": 3.416468620300293, "rewards/rejected": -2.819744110107422, "step": 257 }, { "epoch": 0.09, "learning_rate": 1.998973131161877e-06, "logits/chosen": -0.3336355686187744, "logits/rejected": -0.33776751160621643, "logps/chosen": -226.81072998046875, "logps/rejected": -255.2822265625, "loss": 0.174, "rewards/accuracies": 1.0, "rewards/chosen": 0.8419344425201416, "rewards/margins": 4.262091636657715, "rewards/rejected": -3.4201574325561523, "step": 258 }, { "epoch": 0.09, "learning_rate": 1.9989479372827834e-06, "logits/chosen": -0.3147190809249878, "logits/rejected": -0.3165591359138489, "logps/chosen": -162.73622131347656, "logps/rejected": -221.58636474609375, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": 0.2793712317943573, "rewards/margins": 3.4459240436553955, "rewards/rejected": -3.166553020477295, "step": 259 }, { "epoch": 0.09, "learning_rate": 1.998922438238881e-06, "logits/chosen": -0.3617367744445801, "logits/rejected": -0.37376832962036133, "logps/chosen": -138.3329315185547, "logps/rejected": -195.62936401367188, "loss": 0.1722, "rewards/accuracies": 0.9375, "rewards/chosen": 0.729229211807251, "rewards/margins": 3.868166208267212, "rewards/rejected": -3.138936996459961, "step": 260 }, { "epoch": 0.09, "learning_rate": 1.998896634037959e-06, "logits/chosen": -0.36199086904525757, "logits/rejected": -0.3606591820716858, "logps/chosen": -171.10475158691406, "logps/rejected": -144.111572265625, "loss": 0.1447, "rewards/accuracies": 1.0, "rewards/chosen": 1.0471385717391968, "rewards/margins": 3.4293766021728516, "rewards/rejected": -2.3822383880615234, "step": 261 }, { "epoch": 0.09, "learning_rate": 1.9988705246879e-06, "logits/chosen": -0.38567644357681274, "logits/rejected": -0.39146190881729126, "logps/chosen": -120.44933319091797, "logps/rejected": -188.5089874267578, "loss": 0.1547, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4568111002445221, "rewards/margins": 2.931818962097168, "rewards/rejected": -2.475008010864258, "step": 262 }, { "epoch": 0.09, "learning_rate": 1.9988441101966806e-06, "logits/chosen": -0.3619241416454315, "logits/rejected": -0.35551485419273376, "logps/chosen": -213.7313690185547, "logps/rejected": -206.05807495117188, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": 0.6082780361175537, "rewards/margins": 3.586414337158203, "rewards/rejected": -2.9781360626220703, "step": 263 }, { "epoch": 0.09, "learning_rate": 1.9988173905723702e-06, "logits/chosen": -0.39027249813079834, "logits/rejected": -0.3758173882961273, "logps/chosen": -193.341064453125, "logps/rejected": -159.59463500976562, "loss": 0.1307, "rewards/accuracies": 1.0, "rewards/chosen": 0.4571547508239746, "rewards/margins": 2.740644693374634, "rewards/rejected": -2.28348970413208, "step": 264 }, { "epoch": 0.09, "learning_rate": 1.998790365823131e-06, "logits/chosen": -0.38988399505615234, "logits/rejected": -0.3854425549507141, "logps/chosen": -138.16197204589844, "logps/rejected": -150.6834259033203, "loss": 0.1514, "rewards/accuracies": 1.0, "rewards/chosen": 0.06985274702310562, "rewards/margins": 2.435347318649292, "rewards/rejected": -2.365494728088379, "step": 265 }, { "epoch": 0.09, "learning_rate": 1.998763035957218e-06, "logits/chosen": -0.4567091464996338, "logits/rejected": -0.4461557865142822, "logps/chosen": -191.23512268066406, "logps/rejected": -169.64785766601562, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": 0.5890553593635559, "rewards/margins": 3.3656275272369385, "rewards/rejected": -2.7765722274780273, "step": 266 }, { "epoch": 0.09, "learning_rate": 1.998735400982981e-06, "logits/chosen": -0.41239258646965027, "logits/rejected": -0.412753164768219, "logps/chosen": -283.2748107910156, "logps/rejected": -275.42779541015625, "loss": 0.1145, "rewards/accuracies": 1.0, "rewards/chosen": 0.9947189688682556, "rewards/margins": 5.3203020095825195, "rewards/rejected": -4.325583457946777, "step": 267 }, { "epoch": 0.09, "learning_rate": 1.998707460908862e-06, "logits/chosen": -0.3454158902168274, "logits/rejected": -0.3426780700683594, "logps/chosen": -130.00918579101562, "logps/rejected": -169.2588653564453, "loss": 0.1407, "rewards/accuracies": 1.0, "rewards/chosen": 0.3547174632549286, "rewards/margins": 2.4735970497131348, "rewards/rejected": -2.118879795074463, "step": 268 }, { "epoch": 0.09, "learning_rate": 1.998679215743396e-06, "logits/chosen": -0.38052019476890564, "logits/rejected": -0.3793506920337677, "logps/chosen": -170.7671661376953, "logps/rejected": -186.26161193847656, "loss": 0.1866, "rewards/accuracies": 1.0, "rewards/chosen": 0.1985599547624588, "rewards/margins": 2.6929781436920166, "rewards/rejected": -2.4944183826446533, "step": 269 }, { "epoch": 0.09, "learning_rate": 1.998650665495212e-06, "logits/chosen": -0.3599231541156769, "logits/rejected": -0.35046106576919556, "logps/chosen": -167.77584838867188, "logps/rejected": -147.69081115722656, "loss": 0.1525, "rewards/accuracies": 1.0, "rewards/chosen": 0.8006004095077515, "rewards/margins": 3.4594509601593018, "rewards/rejected": -2.6588504314422607, "step": 270 }, { "epoch": 0.09, "learning_rate": 1.9986218101730314e-06, "logits/chosen": -0.40927189588546753, "logits/rejected": -0.39881911873817444, "logps/chosen": -222.2960662841797, "logps/rejected": -206.148681640625, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": 0.36687299609184265, "rewards/margins": 3.516132354736328, "rewards/rejected": -3.149259090423584, "step": 271 }, { "epoch": 0.09, "learning_rate": 1.9985926497856685e-06, "logits/chosen": -0.3821783661842346, "logits/rejected": -0.3747321367263794, "logps/chosen": -181.99375915527344, "logps/rejected": -215.7544708251953, "loss": 0.1214, "rewards/accuracies": 1.0, "rewards/chosen": 0.6329823732376099, "rewards/margins": 4.639272689819336, "rewards/rejected": -4.006290435791016, "step": 272 }, { "epoch": 0.09, "learning_rate": 1.9985631843420324e-06, "logits/chosen": -0.3597754240036011, "logits/rejected": -0.3475554585456848, "logps/chosen": -127.39689636230469, "logps/rejected": -142.09571838378906, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": 0.3524957597255707, "rewards/margins": 3.01165509223938, "rewards/rejected": -2.6591591835021973, "step": 273 }, { "epoch": 0.09, "learning_rate": 1.9985334138511237e-06, "logits/chosen": -0.3183852434158325, "logits/rejected": -0.304612398147583, "logps/chosen": -205.46630859375, "logps/rejected": -190.80975341796875, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": 0.5257024765014648, "rewards/margins": 4.355990886688232, "rewards/rejected": -3.8302884101867676, "step": 274 }, { "epoch": 0.09, "learning_rate": 1.9985033383220373e-06, "logits/chosen": -0.36765941977500916, "logits/rejected": -0.37147971987724304, "logps/chosen": -193.6820068359375, "logps/rejected": -222.64627075195312, "loss": 0.1427, "rewards/accuracies": 0.9375, "rewards/chosen": 0.26533424854278564, "rewards/margins": 3.503085136413574, "rewards/rejected": -3.237751007080078, "step": 275 }, { "epoch": 0.09, "learning_rate": 1.9984729577639607e-06, "logits/chosen": -0.2973068952560425, "logits/rejected": -0.3080202341079712, "logps/chosen": -222.02626037597656, "logps/rejected": -223.16827392578125, "loss": 0.132, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6701211929321289, "rewards/margins": 4.507485389709473, "rewards/rejected": -3.8373637199401855, "step": 276 }, { "epoch": 0.09, "learning_rate": 1.9984422721861747e-06, "logits/chosen": -0.41979870200157166, "logits/rejected": -0.4115765392780304, "logps/chosen": -164.84567260742188, "logps/rejected": -177.60643005371094, "loss": 0.1421, "rewards/accuracies": 1.0, "rewards/chosen": 0.28920161724090576, "rewards/margins": 3.4934940338134766, "rewards/rejected": -3.2042922973632812, "step": 277 }, { "epoch": 0.09, "learning_rate": 1.9984112815980535e-06, "logits/chosen": -0.39088505506515503, "logits/rejected": -0.3800049126148224, "logps/chosen": -210.19349670410156, "logps/rejected": -210.3577117919922, "loss": 0.2095, "rewards/accuracies": 1.0, "rewards/chosen": 0.7202905416488647, "rewards/margins": 3.8248038291931152, "rewards/rejected": -3.10451340675354, "step": 278 }, { "epoch": 0.1, "learning_rate": 1.998379986009064e-06, "logits/chosen": -0.42527052760124207, "logits/rejected": -0.4299056828022003, "logps/chosen": -158.07220458984375, "logps/rejected": -180.99427795410156, "loss": 0.1158, "rewards/accuracies": 1.0, "rewards/chosen": 0.3347249925136566, "rewards/margins": 3.465278148651123, "rewards/rejected": -3.1305532455444336, "step": 279 }, { "epoch": 0.1, "learning_rate": 1.9983483854287665e-06, "logits/chosen": -0.4082021713256836, "logits/rejected": -0.41191020607948303, "logps/chosen": -205.50631713867188, "logps/rejected": -287.3282775878906, "loss": 0.1452, "rewards/accuracies": 1.0, "rewards/chosen": 0.2486642301082611, "rewards/margins": 4.800168037414551, "rewards/rejected": -4.551503658294678, "step": 280 }, { "epoch": 0.1, "learning_rate": 1.998316479866815e-06, "logits/chosen": -0.40627428889274597, "logits/rejected": -0.40289169549942017, "logps/chosen": -174.7906494140625, "logps/rejected": -182.51075744628906, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": 0.7838396430015564, "rewards/margins": 3.2712841033935547, "rewards/rejected": -2.4874444007873535, "step": 281 }, { "epoch": 0.1, "learning_rate": 1.998284269332956e-06, "logits/chosen": -0.31596851348876953, "logits/rejected": -0.3198936879634857, "logps/chosen": -148.71827697753906, "logps/rejected": -200.27163696289062, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/chosen": 0.20606449246406555, "rewards/margins": 3.06001615524292, "rewards/rejected": -2.8539516925811768, "step": 282 }, { "epoch": 0.1, "learning_rate": 1.9982517538370293e-06, "logits/chosen": -0.33274388313293457, "logits/rejected": -0.3312300741672516, "logps/chosen": -145.02403259277344, "logps/rejected": -165.5538787841797, "loss": 0.162, "rewards/accuracies": 1.0, "rewards/chosen": 0.7522860765457153, "rewards/margins": 3.405168056488037, "rewards/rejected": -2.6528823375701904, "step": 283 }, { "epoch": 0.1, "learning_rate": 1.9982189333889683e-06, "logits/chosen": -0.3904164135456085, "logits/rejected": -0.3846819996833801, "logps/chosen": -245.54373168945312, "logps/rejected": -275.6123352050781, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 1.112199306488037, "rewards/margins": 4.921142101287842, "rewards/rejected": -3.808943033218384, "step": 284 }, { "epoch": 0.1, "learning_rate": 1.9981858079987984e-06, "logits/chosen": -0.40017157793045044, "logits/rejected": -0.3871762156486511, "logps/chosen": -203.10231018066406, "logps/rejected": -192.75315856933594, "loss": 0.1442, "rewards/accuracies": 0.9375, "rewards/chosen": 0.04911227524280548, "rewards/margins": 4.020562648773193, "rewards/rejected": -3.9714505672454834, "step": 285 }, { "epoch": 0.1, "learning_rate": 1.9981523776766393e-06, "logits/chosen": -0.4251673221588135, "logits/rejected": -0.4174371659755707, "logps/chosen": -191.56521606445312, "logps/rejected": -176.27645874023438, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": 0.550426721572876, "rewards/margins": 3.913705587387085, "rewards/rejected": -3.363278865814209, "step": 286 }, { "epoch": 0.1, "learning_rate": 1.9981186424327043e-06, "logits/chosen": -0.4664511978626251, "logits/rejected": -0.47252923250198364, "logps/chosen": -159.18455505371094, "logps/rejected": -200.8080596923828, "loss": 0.0996, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5452412366867065, "rewards/margins": 4.3696818351745605, "rewards/rejected": -3.8244400024414062, "step": 287 }, { "epoch": 0.1, "learning_rate": 1.9980846022772976e-06, "logits/chosen": -0.3761683702468872, "logits/rejected": -0.38154542446136475, "logps/chosen": -180.57447814941406, "logps/rejected": -200.57748413085938, "loss": 0.1395, "rewards/accuracies": 0.9375, "rewards/chosen": 0.52030348777771, "rewards/margins": 3.178215742111206, "rewards/rejected": -2.657912254333496, "step": 288 }, { "epoch": 0.1, "learning_rate": 1.9980502572208192e-06, "logits/chosen": -0.4005568027496338, "logits/rejected": -0.3948192000389099, "logps/chosen": -188.15647888183594, "logps/rejected": -224.60899353027344, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": 1.2309362888336182, "rewards/margins": 4.781342029571533, "rewards/rejected": -3.550405979156494, "step": 289 }, { "epoch": 0.1, "learning_rate": 1.9980156072737608e-06, "logits/chosen": -0.4675977826118469, "logits/rejected": -0.48198288679122925, "logps/chosen": -189.20999145507812, "logps/rejected": -270.5663757324219, "loss": 0.1218, "rewards/accuracies": 1.0, "rewards/chosen": 0.039050839841365814, "rewards/margins": 4.717092990875244, "rewards/rejected": -4.678041458129883, "step": 290 }, { "epoch": 0.1, "learning_rate": 1.997980652446707e-06, "logits/chosen": -0.3790937066078186, "logits/rejected": -0.3793366849422455, "logps/chosen": -207.93698120117188, "logps/rejected": -285.7564697265625, "loss": 0.1208, "rewards/accuracies": 1.0, "rewards/chosen": 0.780754566192627, "rewards/margins": 4.224028587341309, "rewards/rejected": -3.4432742595672607, "step": 291 }, { "epoch": 0.1, "learning_rate": 1.9979453927503364e-06, "logits/chosen": -0.4101087749004364, "logits/rejected": -0.4140898585319519, "logps/chosen": -212.0942840576172, "logps/rejected": -250.2639923095703, "loss": 0.1206, "rewards/accuracies": 1.0, "rewards/chosen": 0.26629626750946045, "rewards/margins": 5.407047271728516, "rewards/rejected": -5.140750408172607, "step": 292 }, { "epoch": 0.1, "learning_rate": 1.9979098281954203e-06, "logits/chosen": -0.39801570773124695, "logits/rejected": -0.4175342619419098, "logps/chosen": -153.9769744873047, "logps/rejected": -241.30001831054688, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": 0.42501890659332275, "rewards/margins": 4.403995513916016, "rewards/rejected": -3.9789764881134033, "step": 293 }, { "epoch": 0.1, "learning_rate": 1.997873958792823e-06, "logits/chosen": -0.4269793927669525, "logits/rejected": -0.4358789920806885, "logps/chosen": -206.27191162109375, "logps/rejected": -218.9051971435547, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": 0.36652177572250366, "rewards/margins": 3.5382025241851807, "rewards/rejected": -3.171680450439453, "step": 294 }, { "epoch": 0.1, "learning_rate": 1.997837784553502e-06, "logits/chosen": -0.41329818964004517, "logits/rejected": -0.41769787669181824, "logps/chosen": -165.5087890625, "logps/rejected": -233.89588928222656, "loss": 0.1128, "rewards/accuracies": 1.0, "rewards/chosen": 0.7385711073875427, "rewards/margins": 3.172114849090576, "rewards/rejected": -2.4335434436798096, "step": 295 }, { "epoch": 0.1, "learning_rate": 1.9978013054885086e-06, "logits/chosen": -0.38400939106941223, "logits/rejected": -0.3866112232208252, "logps/chosen": -196.24620056152344, "logps/rejected": -225.6888885498047, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": 0.14660605788230896, "rewards/margins": 4.072282314300537, "rewards/rejected": -3.9256765842437744, "step": 296 }, { "epoch": 0.1, "learning_rate": 1.9977645216089863e-06, "logits/chosen": -0.40109631419181824, "logits/rejected": -0.40149015188217163, "logps/chosen": -263.7791748046875, "logps/rejected": -264.74139404296875, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": 0.7411483526229858, "rewards/margins": 4.237608909606934, "rewards/rejected": -3.496460437774658, "step": 297 }, { "epoch": 0.1, "learning_rate": 1.9977274329261724e-06, "logits/chosen": -0.41764357686042786, "logits/rejected": -0.41277432441711426, "logps/chosen": -228.4204559326172, "logps/rejected": -243.15606689453125, "loss": 0.1226, "rewards/accuracies": 1.0, "rewards/chosen": 1.2609810829162598, "rewards/margins": 5.2233381271362305, "rewards/rejected": -3.9623570442199707, "step": 298 }, { "epoch": 0.1, "learning_rate": 1.997690039451396e-06, "logits/chosen": -0.3653646409511566, "logits/rejected": -0.3713206946849823, "logps/chosen": -168.3474578857422, "logps/rejected": -226.3717803955078, "loss": 0.1529, "rewards/accuracies": 0.9375, "rewards/chosen": 0.15957480669021606, "rewards/margins": 3.171478748321533, "rewards/rejected": -3.011904001235962, "step": 299 }, { "epoch": 0.1, "learning_rate": 1.9976523411960815e-06, "logits/chosen": -0.49433863162994385, "logits/rejected": -0.4965887665748596, "logps/chosen": -138.49685668945312, "logps/rejected": -202.60951232910156, "loss": 0.1426, "rewards/accuracies": 1.0, "rewards/chosen": 0.05820811167359352, "rewards/margins": 4.338497161865234, "rewards/rejected": -4.280289649963379, "step": 300 }, { "epoch": 0.1, "learning_rate": 1.9976143381717447e-06, "logits/chosen": -0.4228617548942566, "logits/rejected": -0.41150641441345215, "logps/chosen": -191.19183349609375, "logps/rejected": -181.9066162109375, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": 0.7782779932022095, "rewards/margins": 3.3280582427978516, "rewards/rejected": -2.5497803688049316, "step": 301 }, { "epoch": 0.1, "learning_rate": 1.997576030389995e-06, "logits/chosen": -0.4191322922706604, "logits/rejected": -0.4140099287033081, "logps/chosen": -199.7984619140625, "logps/rejected": -198.80764770507812, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 0.600385308265686, "rewards/margins": 4.068626403808594, "rewards/rejected": -3.4682414531707764, "step": 302 }, { "epoch": 0.1, "learning_rate": 1.9975374178625345e-06, "logits/chosen": -0.40211066603660583, "logits/rejected": -0.41436436772346497, "logps/chosen": -200.19781494140625, "logps/rejected": -301.3635559082031, "loss": 0.1221, "rewards/accuracies": 1.0, "rewards/chosen": 0.6646886467933655, "rewards/margins": 6.094583511352539, "rewards/rejected": -5.429895401000977, "step": 303 }, { "epoch": 0.1, "learning_rate": 1.997498500601159e-06, "logits/chosen": -0.3531968593597412, "logits/rejected": -0.34592506289482117, "logps/chosen": -205.25128173828125, "logps/rejected": -225.17161560058594, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": 0.9679641723632812, "rewards/margins": 5.139934062957764, "rewards/rejected": -4.171969890594482, "step": 304 }, { "epoch": 0.1, "learning_rate": 1.9974592786177583e-06, "logits/chosen": -0.4348013997077942, "logits/rejected": -0.4408420920372009, "logps/chosen": -253.50677490234375, "logps/rejected": -294.1885986328125, "loss": 0.1206, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8778119087219238, "rewards/margins": 4.587221145629883, "rewards/rejected": -3.709409236907959, "step": 305 }, { "epoch": 0.1, "learning_rate": 1.9974197519243123e-06, "logits/chosen": -0.33300161361694336, "logits/rejected": -0.3353329300880432, "logps/chosen": -203.20855712890625, "logps/rejected": -225.610595703125, "loss": 0.1659, "rewards/accuracies": 1.0, "rewards/chosen": 0.2745177745819092, "rewards/margins": 4.471946716308594, "rewards/rejected": -4.197429180145264, "step": 306 }, { "epoch": 0.1, "learning_rate": 1.9973799205328974e-06, "logits/chosen": -0.43523934483528137, "logits/rejected": -0.4438977539539337, "logps/chosen": -248.1259765625, "logps/rejected": -317.741455078125, "loss": 0.1125, "rewards/accuracies": 1.0, "rewards/chosen": 0.15971364080905914, "rewards/margins": 4.681036472320557, "rewards/rejected": -4.521323204040527, "step": 307 }, { "epoch": 0.11, "learning_rate": 1.9973397844556807e-06, "logits/chosen": -0.4434594511985779, "logits/rejected": -0.4346660077571869, "logps/chosen": -252.76841735839844, "logps/rejected": -269.37017822265625, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": 0.04505440965294838, "rewards/margins": 3.979511260986328, "rewards/rejected": -3.9344565868377686, "step": 308 }, { "epoch": 0.11, "learning_rate": 1.997299343704923e-06, "logits/chosen": -0.533365786075592, "logits/rejected": -0.5285806655883789, "logps/chosen": -234.88108825683594, "logps/rejected": -270.4600524902344, "loss": 0.096, "rewards/accuracies": 1.0, "rewards/chosen": 0.5713220238685608, "rewards/margins": 5.435586452484131, "rewards/rejected": -4.864264965057373, "step": 309 }, { "epoch": 0.11, "learning_rate": 1.9972585982929792e-06, "logits/chosen": -0.4091928005218506, "logits/rejected": -0.40551817417144775, "logps/chosen": -153.26495361328125, "logps/rejected": -156.25164794921875, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 0.700111448764801, "rewards/margins": 3.3007290363311768, "rewards/rejected": -2.6006176471710205, "step": 310 }, { "epoch": 0.11, "learning_rate": 1.997217548232296e-06, "logits/chosen": -0.4703032076358795, "logits/rejected": -0.47481948137283325, "logps/chosen": -167.0120849609375, "logps/rejected": -196.49667358398438, "loss": 0.1209, "rewards/accuracies": 0.9375, "rewards/chosen": -0.16024860739707947, "rewards/margins": 4.161896705627441, "rewards/rejected": -4.322145462036133, "step": 311 }, { "epoch": 0.11, "learning_rate": 1.997176193535414e-06, "logits/chosen": -0.40915223956108093, "logits/rejected": -0.4004373252391815, "logps/chosen": -184.80892944335938, "logps/rejected": -171.9681854248047, "loss": 0.1577, "rewards/accuracies": 1.0, "rewards/chosen": 0.19712218642234802, "rewards/margins": 3.3178348541259766, "rewards/rejected": -3.1207127571105957, "step": 312 }, { "epoch": 0.11, "learning_rate": 1.997134534214966e-06, "logits/chosen": -0.44081535935401917, "logits/rejected": -0.43752118945121765, "logps/chosen": -185.71580505371094, "logps/rejected": -206.5225372314453, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": 0.2685786485671997, "rewards/margins": 4.190568447113037, "rewards/rejected": -3.921989917755127, "step": 313 }, { "epoch": 0.11, "learning_rate": 1.9970925702836784e-06, "logits/chosen": -0.4465596377849579, "logits/rejected": -0.431430846452713, "logps/chosen": -133.80523681640625, "logps/rejected": -151.87208557128906, "loss": 0.1012, "rewards/accuracies": 1.0, "rewards/chosen": 0.6760318875312805, "rewards/margins": 4.073056697845459, "rewards/rejected": -3.3970248699188232, "step": 314 }, { "epoch": 0.11, "learning_rate": 1.997050301754371e-06, "logits/chosen": -0.3694342374801636, "logits/rejected": -0.3683392405509949, "logps/chosen": -210.81600952148438, "logps/rejected": -206.78939819335938, "loss": 0.114, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6517124176025391, "rewards/margins": 3.979224920272827, "rewards/rejected": -3.327512741088867, "step": 315 }, { "epoch": 0.11, "learning_rate": 1.9970077286399556e-06, "logits/chosen": -0.3966459333896637, "logits/rejected": -0.3878629803657532, "logps/chosen": -182.91751098632812, "logps/rejected": -213.7628173828125, "loss": 0.124, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05799799785017967, "rewards/margins": 3.4946765899658203, "rewards/rejected": -3.436678409576416, "step": 316 }, { "epoch": 0.11, "learning_rate": 1.9969648509534388e-06, "logits/chosen": -0.4105909764766693, "logits/rejected": -0.3939398527145386, "logps/chosen": -193.4453582763672, "logps/rejected": -198.99209594726562, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": 0.5645559430122375, "rewards/margins": 4.967166900634766, "rewards/rejected": -4.40261173248291, "step": 317 }, { "epoch": 0.11, "learning_rate": 1.996921668707918e-06, "logits/chosen": -0.4215944707393646, "logits/rejected": -0.41621077060699463, "logps/chosen": -162.2460479736328, "logps/rejected": -208.67092895507812, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": 0.29505807161331177, "rewards/margins": 4.263617992401123, "rewards/rejected": -3.968559741973877, "step": 318 }, { "epoch": 0.11, "learning_rate": 1.9968781819165856e-06, "logits/chosen": -0.46148940920829773, "logits/rejected": -0.44675692915916443, "logps/chosen": -246.35696411132812, "logps/rejected": -209.72743225097656, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/chosen": 0.3079659938812256, "rewards/margins": 5.103099346160889, "rewards/rejected": -4.795133590698242, "step": 319 }, { "epoch": 0.11, "learning_rate": 1.9968343905927254e-06, "logits/chosen": -0.5648790597915649, "logits/rejected": -0.5496253371238708, "logps/chosen": -181.71897888183594, "logps/rejected": -197.12716674804688, "loss": 0.1321, "rewards/accuracies": 1.0, "rewards/chosen": 0.9378310441970825, "rewards/margins": 3.59822940826416, "rewards/rejected": -2.660398483276367, "step": 320 }, { "epoch": 0.11, "learning_rate": 1.9967902947497155e-06, "logits/chosen": -0.43477287888526917, "logits/rejected": -0.4196551442146301, "logps/chosen": -138.52294921875, "logps/rejected": -192.47921752929688, "loss": 0.1327, "rewards/accuracies": 1.0, "rewards/chosen": 0.04718567430973053, "rewards/margins": 5.206087589263916, "rewards/rejected": -5.158902168273926, "step": 321 }, { "epoch": 0.11, "learning_rate": 1.9967458944010266e-06, "logits/chosen": -0.43806353211402893, "logits/rejected": -0.45523327589035034, "logps/chosen": -142.8082275390625, "logps/rejected": -225.1145782470703, "loss": 0.1428, "rewards/accuracies": 1.0, "rewards/chosen": 0.5010855197906494, "rewards/margins": 3.6881461143493652, "rewards/rejected": -3.1870603561401367, "step": 322 }, { "epoch": 0.11, "learning_rate": 1.9967011895602228e-06, "logits/chosen": -0.4418567419052124, "logits/rejected": -0.4387585520744324, "logps/chosen": -174.21185302734375, "logps/rejected": -202.39630126953125, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": 0.15414512157440186, "rewards/margins": 3.7675461769104004, "rewards/rejected": -3.61340069770813, "step": 323 }, { "epoch": 0.11, "learning_rate": 1.99665618024096e-06, "logits/chosen": -0.3768405616283417, "logits/rejected": -0.3807564377784729, "logps/chosen": -220.30348205566406, "logps/rejected": -271.5171203613281, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": 1.0624243021011353, "rewards/margins": 5.615730285644531, "rewards/rejected": -4.5533061027526855, "step": 324 }, { "epoch": 0.11, "learning_rate": 1.996610866456988e-06, "logits/chosen": -0.4651796519756317, "logits/rejected": -0.4586053490638733, "logps/chosen": -234.34593200683594, "logps/rejected": -284.54949951171875, "loss": 0.1014, "rewards/accuracies": 1.0, "rewards/chosen": 1.0730353593826294, "rewards/margins": 4.848196506500244, "rewards/rejected": -3.7751617431640625, "step": 325 }, { "epoch": 0.11, "learning_rate": 1.9965652482221503e-06, "logits/chosen": -0.5229431390762329, "logits/rejected": -0.5109032988548279, "logps/chosen": -221.20733642578125, "logps/rejected": -218.13296508789062, "loss": 0.0954, "rewards/accuracies": 1.0, "rewards/chosen": 1.2607207298278809, "rewards/margins": 4.858883857727051, "rewards/rejected": -3.598163366317749, "step": 326 }, { "epoch": 0.11, "learning_rate": 1.996519325550382e-06, "logits/chosen": -0.4895128011703491, "logits/rejected": -0.47226518392562866, "logps/chosen": -194.82301330566406, "logps/rejected": -196.51400756835938, "loss": 0.0948, "rewards/accuracies": 1.0, "rewards/chosen": 0.17480280995368958, "rewards/margins": 3.627124547958374, "rewards/rejected": -3.4523212909698486, "step": 327 }, { "epoch": 0.11, "learning_rate": 1.996473098455712e-06, "logits/chosen": -0.48711690306663513, "logits/rejected": -0.4875023365020752, "logps/chosen": -212.68624877929688, "logps/rejected": -235.943115234375, "loss": 0.0998, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3355870544910431, "rewards/margins": 4.449376583099365, "rewards/rejected": -4.1137895584106445, "step": 328 }, { "epoch": 0.11, "learning_rate": 1.996426566952262e-06, "logits/chosen": -0.40132343769073486, "logits/rejected": -0.39916858077049255, "logps/chosen": -206.21939086914062, "logps/rejected": -199.81399536132812, "loss": 0.1011, "rewards/accuracies": 1.0, "rewards/chosen": 0.7988061308860779, "rewards/margins": 4.143922328948975, "rewards/rejected": -3.345116138458252, "step": 329 }, { "epoch": 0.11, "learning_rate": 1.996379731054247e-06, "logits/chosen": -0.45072829723358154, "logits/rejected": -0.46536049246788025, "logps/chosen": -226.3848419189453, "logps/rejected": -313.3258056640625, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": 0.24091112613677979, "rewards/margins": 6.92340612411499, "rewards/rejected": -6.682495594024658, "step": 330 }, { "epoch": 0.11, "learning_rate": 1.9963325907759746e-06, "logits/chosen": -0.46174126863479614, "logits/rejected": -0.4644361138343811, "logps/chosen": -275.3448791503906, "logps/rejected": -321.1332092285156, "loss": 0.1222, "rewards/accuracies": 1.0, "rewards/chosen": 0.4768340587615967, "rewards/margins": 5.129276275634766, "rewards/rejected": -4.65244197845459, "step": 331 }, { "epoch": 0.11, "learning_rate": 1.996285146131845e-06, "logits/chosen": -0.36568549275398254, "logits/rejected": -0.35503801703453064, "logps/chosen": -155.03318786621094, "logps/rejected": -196.6220703125, "loss": 0.1149, "rewards/accuracies": 0.9375, "rewards/chosen": 0.19035229086875916, "rewards/margins": 3.606964588165283, "rewards/rejected": -3.4166126251220703, "step": 332 }, { "epoch": 0.11, "learning_rate": 1.9962373971363525e-06, "logits/chosen": -0.4826565384864807, "logits/rejected": -0.4758758842945099, "logps/chosen": -207.3275604248047, "logps/rejected": -205.39013671875, "loss": 0.1023, "rewards/accuracies": 0.9375, "rewards/chosen": 0.32030951976776123, "rewards/margins": 4.37136173248291, "rewards/rejected": -4.051052570343018, "step": 333 }, { "epoch": 0.11, "learning_rate": 1.9961893438040838e-06, "logits/chosen": -0.4444243907928467, "logits/rejected": -0.4523608684539795, "logps/chosen": -77.12159729003906, "logps/rejected": -147.0594482421875, "loss": 0.125, "rewards/accuracies": 1.0, "rewards/chosen": 0.003772526979446411, "rewards/margins": 3.732811450958252, "rewards/rejected": -3.729038715362549, "step": 334 }, { "epoch": 0.11, "learning_rate": 1.996140986149718e-06, "logits/chosen": -0.46059927344322205, "logits/rejected": -0.46917709708213806, "logps/chosen": -134.472900390625, "logps/rejected": -195.556884765625, "loss": 0.148, "rewards/accuracies": 1.0, "rewards/chosen": 0.07121880352497101, "rewards/margins": 4.197135925292969, "rewards/rejected": -4.125916957855225, "step": 335 }, { "epoch": 0.11, "learning_rate": 1.9960923241880277e-06, "logits/chosen": -0.4892587661743164, "logits/rejected": -0.4932560622692108, "logps/chosen": -188.1172332763672, "logps/rejected": -230.45504760742188, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": 0.34635627269744873, "rewards/margins": 3.7776734828948975, "rewards/rejected": -3.4313173294067383, "step": 336 }, { "epoch": 0.12, "learning_rate": 1.996043357933879e-06, "logits/chosen": -0.4575473666191101, "logits/rejected": -0.46887868642807007, "logps/chosen": -122.29627990722656, "logps/rejected": -184.45718383789062, "loss": 0.1152, "rewards/accuracies": 1.0, "rewards/chosen": -0.09887946397066116, "rewards/margins": 4.291030406951904, "rewards/rejected": -4.389909267425537, "step": 337 }, { "epoch": 0.12, "learning_rate": 1.99599408740223e-06, "logits/chosen": -0.5197864174842834, "logits/rejected": -0.5136945843696594, "logps/chosen": -210.3054656982422, "logps/rejected": -245.45596313476562, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": 0.36641472578048706, "rewards/margins": 5.705292224884033, "rewards/rejected": -5.338877201080322, "step": 338 }, { "epoch": 0.12, "learning_rate": 1.9959445126081323e-06, "logits/chosen": -0.46808454394340515, "logits/rejected": -0.46145448088645935, "logps/chosen": -144.09042358398438, "logps/rejected": -171.29359436035156, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": 0.2353702336549759, "rewards/margins": 3.3901960849761963, "rewards/rejected": -3.1548256874084473, "step": 339 }, { "epoch": 0.12, "learning_rate": 1.995894633566731e-06, "logits/chosen": -0.4475659132003784, "logits/rejected": -0.45271509885787964, "logps/chosen": -146.2057647705078, "logps/rejected": -219.630615234375, "loss": 0.0945, "rewards/accuracies": 1.0, "rewards/chosen": 0.20842885971069336, "rewards/margins": 6.01599645614624, "rewards/rejected": -5.807568073272705, "step": 340 }, { "epoch": 0.12, "learning_rate": 1.995844450293262e-06, "logits/chosen": -0.47280195355415344, "logits/rejected": -0.46545353531837463, "logps/chosen": -200.674560546875, "logps/rejected": -279.7247314453125, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": 0.7128838896751404, "rewards/margins": 5.257430076599121, "rewards/rejected": -4.544546604156494, "step": 341 }, { "epoch": 0.12, "learning_rate": 1.9957939628030566e-06, "logits/chosen": -0.5303311944007874, "logits/rejected": -0.5336466431617737, "logps/chosen": -203.4199676513672, "logps/rejected": -257.2626953125, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/chosen": 0.40640950202941895, "rewards/margins": 5.618662357330322, "rewards/rejected": -5.212253570556641, "step": 342 }, { "epoch": 0.12, "learning_rate": 1.9957431711115384e-06, "logits/chosen": -0.4368109107017517, "logits/rejected": -0.4490359425544739, "logps/chosen": -116.56057739257812, "logps/rejected": -157.17837524414062, "loss": 0.067, "rewards/accuracies": 1.0, "rewards/chosen": -0.07672742754220963, "rewards/margins": 4.130671977996826, "rewards/rejected": -4.207399845123291, "step": 343 }, { "epoch": 0.12, "learning_rate": 1.9956920752342224e-06, "logits/chosen": -0.44462451338768005, "logits/rejected": -0.4401261806488037, "logps/chosen": -162.25399780273438, "logps/rejected": -217.0417022705078, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": 0.5305707454681396, "rewards/margins": 4.757296085357666, "rewards/rejected": -4.226725101470947, "step": 344 }, { "epoch": 0.12, "learning_rate": 1.995640675186718e-06, "logits/chosen": -0.48350855708122253, "logits/rejected": -0.47006770968437195, "logps/chosen": -204.9075927734375, "logps/rejected": -287.24969482421875, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": 0.5893440842628479, "rewards/margins": 6.6146135330200195, "rewards/rejected": -6.025269508361816, "step": 345 }, { "epoch": 0.12, "learning_rate": 1.995588970984728e-06, "logits/chosen": -0.49875664710998535, "logits/rejected": -0.49001795053482056, "logps/chosen": -199.738525390625, "logps/rejected": -165.00189208984375, "loss": 0.0852, "rewards/accuracies": 1.0, "rewards/chosen": 0.89272141456604, "rewards/margins": 4.378060817718506, "rewards/rejected": -3.485339403152466, "step": 346 }, { "epoch": 0.12, "learning_rate": 1.9955369626440464e-06, "logits/chosen": -0.49917444586753845, "logits/rejected": -0.4921172857284546, "logps/chosen": -193.96258544921875, "logps/rejected": -242.90655517578125, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": 0.2701696753501892, "rewards/margins": 5.303501129150391, "rewards/rejected": -5.033331394195557, "step": 347 }, { "epoch": 0.12, "learning_rate": 1.995484650180562e-06, "logits/chosen": -0.47614917159080505, "logits/rejected": -0.47219419479370117, "logps/chosen": -205.9112091064453, "logps/rejected": -258.2361145019531, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": 0.4445757269859314, "rewards/margins": 5.628198146820068, "rewards/rejected": -5.183623313903809, "step": 348 }, { "epoch": 0.12, "learning_rate": 1.9954320336102547e-06, "logits/chosen": -0.47252562642097473, "logits/rejected": -0.4700027406215668, "logps/chosen": -200.30662536621094, "logps/rejected": -207.8358612060547, "loss": 0.1205, "rewards/accuracies": 1.0, "rewards/chosen": 0.6807957887649536, "rewards/margins": 4.848952770233154, "rewards/rejected": -4.168156623840332, "step": 349 }, { "epoch": 0.12, "learning_rate": 1.9953791129491983e-06, "logits/chosen": -0.5125841498374939, "logits/rejected": -0.5139318108558655, "logps/chosen": -202.4232177734375, "logps/rejected": -250.39108276367188, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": 0.4278804361820221, "rewards/margins": 4.001195907592773, "rewards/rejected": -3.573315382003784, "step": 350 }, { "epoch": 0.12, "learning_rate": 1.995325888213559e-06, "logits/chosen": -0.500829815864563, "logits/rejected": -0.49840739369392395, "logps/chosen": -224.59625244140625, "logps/rejected": -252.26925659179688, "loss": 0.1032, "rewards/accuracies": 0.9375, "rewards/chosen": -0.015708938241004944, "rewards/margins": 4.481995105743408, "rewards/rejected": -4.497703552246094, "step": 351 }, { "epoch": 0.12, "learning_rate": 1.9952723594195977e-06, "logits/chosen": -0.5466257333755493, "logits/rejected": -0.5596123337745667, "logps/chosen": -150.22195434570312, "logps/rejected": -220.324951171875, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": 0.996455729007721, "rewards/margins": 6.353837490081787, "rewards/rejected": -5.357381343841553, "step": 352 }, { "epoch": 0.12, "learning_rate": 1.9952185265836646e-06, "logits/chosen": -0.5134869813919067, "logits/rejected": -0.5097974538803101, "logps/chosen": -226.3052520751953, "logps/rejected": -295.0455017089844, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -0.03564798831939697, "rewards/margins": 5.767333030700684, "rewards/rejected": -5.802980899810791, "step": 353 }, { "epoch": 0.12, "learning_rate": 1.9951643897222064e-06, "logits/chosen": -0.4608212411403656, "logits/rejected": -0.462986022233963, "logps/chosen": -142.49166870117188, "logps/rejected": -193.8701629638672, "loss": 0.1186, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5058528184890747, "rewards/margins": 3.758740186691284, "rewards/rejected": -3.25288724899292, "step": 354 }, { "epoch": 0.12, "learning_rate": 1.995109948851761e-06, "logits/chosen": -0.5278061032295227, "logits/rejected": -0.505395770072937, "logps/chosen": -198.75375366210938, "logps/rejected": -221.84156799316406, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": 0.7754907011985779, "rewards/margins": 5.53749418258667, "rewards/rejected": -4.762003421783447, "step": 355 }, { "epoch": 0.12, "learning_rate": 1.9950552039889584e-06, "logits/chosen": -0.5235865116119385, "logits/rejected": -0.5240653157234192, "logps/chosen": -152.8104705810547, "logps/rejected": -239.8953857421875, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": -0.3512006103992462, "rewards/margins": 5.759964942932129, "rewards/rejected": -6.1111650466918945, "step": 356 }, { "epoch": 0.12, "learning_rate": 1.9950001551505235e-06, "logits/chosen": -0.5319740772247314, "logits/rejected": -0.5383834838867188, "logps/chosen": -262.6832275390625, "logps/rejected": -339.7005920410156, "loss": 0.0645, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7317153215408325, "rewards/margins": 5.4283905029296875, "rewards/rejected": -4.696674823760986, "step": 357 }, { "epoch": 0.12, "learning_rate": 1.9949448023532723e-06, "logits/chosen": -0.5619550943374634, "logits/rejected": -0.557744026184082, "logps/chosen": -270.64990234375, "logps/rejected": -309.3435974121094, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": 0.3816154897212982, "rewards/margins": 5.7379560470581055, "rewards/rejected": -5.3563408851623535, "step": 358 }, { "epoch": 0.12, "learning_rate": 1.9948891456141146e-06, "logits/chosen": -0.4012269973754883, "logits/rejected": -0.3980208933353424, "logps/chosen": -158.4585418701172, "logps/rejected": -243.857666015625, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": 0.158749520778656, "rewards/margins": 6.874906063079834, "rewards/rejected": -6.716156959533691, "step": 359 }, { "epoch": 0.12, "learning_rate": 1.994833184950053e-06, "logits/chosen": -0.5763041377067566, "logits/rejected": -0.5754438638687134, "logps/chosen": -196.81126403808594, "logps/rejected": -271.4375, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": 0.958350419998169, "rewards/margins": 5.703655242919922, "rewards/rejected": -4.745305061340332, "step": 360 }, { "epoch": 0.12, "learning_rate": 1.994776920378182e-06, "logits/chosen": -0.5424860119819641, "logits/rejected": -0.5205977559089661, "logps/chosen": -258.5152587890625, "logps/rejected": -285.639404296875, "loss": 0.0948, "rewards/accuracies": 1.0, "rewards/chosen": 0.7542229890823364, "rewards/margins": 7.294932842254639, "rewards/rejected": -6.540709495544434, "step": 361 }, { "epoch": 0.12, "learning_rate": 1.99472035191569e-06, "logits/chosen": -0.484157919883728, "logits/rejected": -0.47724759578704834, "logps/chosen": -182.16452026367188, "logps/rejected": -246.5890350341797, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": 0.7355098724365234, "rewards/margins": 5.758846282958984, "rewards/rejected": -5.023336410522461, "step": 362 }, { "epoch": 0.12, "learning_rate": 1.994663479579858e-06, "logits/chosen": -0.5313054919242859, "logits/rejected": -0.5360025763511658, "logps/chosen": -203.75343322753906, "logps/rejected": -275.2561950683594, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": 0.5590600967407227, "rewards/margins": 7.013777732849121, "rewards/rejected": -6.454718112945557, "step": 363 }, { "epoch": 0.12, "learning_rate": 1.9946063033880597e-06, "logits/chosen": -0.49619996547698975, "logits/rejected": -0.4978720247745514, "logps/chosen": -188.73304748535156, "logps/rejected": -236.78550720214844, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": -0.1006099060177803, "rewards/margins": 4.2193498611450195, "rewards/rejected": -4.31995964050293, "step": 364 }, { "epoch": 0.12, "learning_rate": 1.9945488233577616e-06, "logits/chosen": -0.42754873633384705, "logits/rejected": -0.4155294895172119, "logps/chosen": -252.07118225097656, "logps/rejected": -250.62435913085938, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": 0.9503533244132996, "rewards/margins": 6.197206020355225, "rewards/rejected": -5.246852874755859, "step": 365 }, { "epoch": 0.12, "learning_rate": 1.9944910395065227e-06, "logits/chosen": -0.48365673422813416, "logits/rejected": -0.4811374247074127, "logps/chosen": -141.88682556152344, "logps/rejected": -185.72654724121094, "loss": 0.12, "rewards/accuracies": 1.0, "rewards/chosen": -0.04747316241264343, "rewards/margins": 3.69620680809021, "rewards/rejected": -3.743680000305176, "step": 366 }, { "epoch": 0.13, "learning_rate": 1.994432951851996e-06, "logits/chosen": -0.49836358428001404, "logits/rejected": -0.5009729862213135, "logps/chosen": -179.04660034179688, "logps/rejected": -251.1857147216797, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 0.6520076394081116, "rewards/margins": 5.213634014129639, "rewards/rejected": -4.56162691116333, "step": 367 }, { "epoch": 0.13, "learning_rate": 1.9943745604119256e-06, "logits/chosen": -0.5333894491195679, "logits/rejected": -0.5431580543518066, "logps/chosen": -146.4069366455078, "logps/rejected": -201.63038635253906, "loss": 0.1163, "rewards/accuracies": 1.0, "rewards/chosen": 0.6473158597946167, "rewards/margins": 4.557812690734863, "rewards/rejected": -3.910496711730957, "step": 368 }, { "epoch": 0.13, "learning_rate": 1.99431586520415e-06, "logits/chosen": -0.550730288028717, "logits/rejected": -0.5418885946273804, "logps/chosen": -171.68780517578125, "logps/rejected": -224.53048706054688, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -0.628834068775177, "rewards/margins": 5.545014381408691, "rewards/rejected": -6.173848628997803, "step": 369 }, { "epoch": 0.13, "learning_rate": 1.9942568662465994e-06, "logits/chosen": -0.5453436374664307, "logits/rejected": -0.5451730489730835, "logps/chosen": -217.4578857421875, "logps/rejected": -306.131103515625, "loss": 0.0759, "rewards/accuracies": 0.9375, "rewards/chosen": -0.34425088763237, "rewards/margins": 6.178825378417969, "rewards/rejected": -6.523076057434082, "step": 370 }, { "epoch": 0.13, "learning_rate": 1.9941975635572973e-06, "logits/chosen": -0.5441616177558899, "logits/rejected": -0.5360085368156433, "logps/chosen": -191.4613037109375, "logps/rejected": -244.8155517578125, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 0.43325841426849365, "rewards/margins": 6.767821311950684, "rewards/rejected": -6.3345627784729, "step": 371 }, { "epoch": 0.13, "learning_rate": 1.9941379571543595e-06, "logits/chosen": -0.5230096578598022, "logits/rejected": -0.5114243626594543, "logps/chosen": -169.6440887451172, "logps/rejected": -197.43017578125, "loss": 0.1087, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8818077445030212, "rewards/margins": 5.363513946533203, "rewards/rejected": -4.4817070960998535, "step": 372 }, { "epoch": 0.13, "learning_rate": 1.9940780470559954e-06, "logits/chosen": -0.5872077941894531, "logits/rejected": -0.5790135264396667, "logps/chosen": -198.55001831054688, "logps/rejected": -207.8117218017578, "loss": 0.1109, "rewards/accuracies": 1.0, "rewards/chosen": 0.5045213103294373, "rewards/margins": 4.5392584800720215, "rewards/rejected": -4.034737586975098, "step": 373 }, { "epoch": 0.13, "learning_rate": 1.994017833280506e-06, "logits/chosen": -0.5693016648292542, "logits/rejected": -0.5591641664505005, "logps/chosen": -140.73692321777344, "logps/rejected": -203.28347778320312, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": 0.05185642093420029, "rewards/margins": 4.885408878326416, "rewards/rejected": -4.833552360534668, "step": 374 }, { "epoch": 0.13, "learning_rate": 1.993957315846287e-06, "logits/chosen": -0.4700160026550293, "logits/rejected": -0.4662608504295349, "logps/chosen": -152.06884765625, "logps/rejected": -214.07369995117188, "loss": 0.1018, "rewards/accuracies": 1.0, "rewards/chosen": 0.5514971017837524, "rewards/margins": 5.1355977058410645, "rewards/rejected": -4.584100246429443, "step": 375 }, { "epoch": 0.13, "learning_rate": 1.9938964947718248e-06, "logits/chosen": -0.5046513676643372, "logits/rejected": -0.5074822306632996, "logps/chosen": -155.9630889892578, "logps/rejected": -211.26483154296875, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": -0.21037261188030243, "rewards/margins": 4.854276657104492, "rewards/rejected": -5.06464958190918, "step": 376 }, { "epoch": 0.13, "learning_rate": 1.993835370075699e-06, "logits/chosen": -0.5874579548835754, "logits/rejected": -0.5871585011482239, "logps/chosen": -171.1492919921875, "logps/rejected": -210.9904327392578, "loss": 0.0775, "rewards/accuracies": 1.0, "rewards/chosen": -0.12133531272411346, "rewards/margins": 4.592319488525391, "rewards/rejected": -4.7136549949646, "step": 377 }, { "epoch": 0.13, "learning_rate": 1.993773941776583e-06, "logits/chosen": -0.5317906737327576, "logits/rejected": -0.5305454134941101, "logps/chosen": -162.1766815185547, "logps/rejected": -263.15020751953125, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -0.36581099033355713, "rewards/margins": 6.41455078125, "rewards/rejected": -6.780361175537109, "step": 378 }, { "epoch": 0.13, "learning_rate": 1.9937122098932426e-06, "logits/chosen": -0.5902105569839478, "logits/rejected": -0.5597472190856934, "logps/chosen": -206.01487731933594, "logps/rejected": -247.7706756591797, "loss": 0.0819, "rewards/accuracies": 1.0, "rewards/chosen": 0.1927393674850464, "rewards/margins": 6.225776195526123, "rewards/rejected": -6.033037185668945, "step": 379 }, { "epoch": 0.13, "learning_rate": 1.993650174444535e-06, "logits/chosen": -0.558541476726532, "logits/rejected": -0.5607544183731079, "logps/chosen": -210.57154846191406, "logps/rejected": -280.32781982421875, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": 0.15827062726020813, "rewards/margins": 7.618025779724121, "rewards/rejected": -7.459755897521973, "step": 380 }, { "epoch": 0.13, "learning_rate": 1.9935878354494123e-06, "logits/chosen": -0.48245319724082947, "logits/rejected": -0.4764789342880249, "logps/chosen": -164.22100830078125, "logps/rejected": -220.96803283691406, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": 0.031092405319213867, "rewards/margins": 5.364202499389648, "rewards/rejected": -5.3331098556518555, "step": 381 }, { "epoch": 0.13, "learning_rate": 1.9935251929269174e-06, "logits/chosen": -0.6126720905303955, "logits/rejected": -0.5998006463050842, "logps/chosen": -213.19927978515625, "logps/rejected": -221.03762817382812, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": 0.38763928413391113, "rewards/margins": 4.421794414520264, "rewards/rejected": -4.034155368804932, "step": 382 }, { "epoch": 0.13, "learning_rate": 1.9934622468961872e-06, "logits/chosen": -0.45678895711898804, "logits/rejected": -0.44517821073532104, "logps/chosen": -169.45382690429688, "logps/rejected": -193.4560546875, "loss": 0.0687, "rewards/accuracies": 1.0, "rewards/chosen": -0.3137757182121277, "rewards/margins": 4.782711029052734, "rewards/rejected": -5.096486568450928, "step": 383 }, { "epoch": 0.13, "learning_rate": 1.9933989973764506e-06, "logits/chosen": -0.4821074604988098, "logits/rejected": -0.47922658920288086, "logps/chosen": -209.41969299316406, "logps/rejected": -282.2169189453125, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.8878277540206909, "rewards/margins": 7.5779619216918945, "rewards/rejected": -6.690134048461914, "step": 384 }, { "epoch": 0.13, "learning_rate": 1.9933354443870295e-06, "logits/chosen": -0.5090782642364502, "logits/rejected": -0.49593299627304077, "logps/chosen": -199.7574462890625, "logps/rejected": -216.8076171875, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": 0.08141501247882843, "rewards/margins": 4.485403060913086, "rewards/rejected": -4.403987884521484, "step": 385 }, { "epoch": 0.13, "learning_rate": 1.993271587947338e-06, "logits/chosen": -0.48402947187423706, "logits/rejected": -0.474618136882782, "logps/chosen": -100.53433227539062, "logps/rejected": -133.16619873046875, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": 0.15095236897468567, "rewards/margins": 4.741864204406738, "rewards/rejected": -4.590910911560059, "step": 386 }, { "epoch": 0.13, "learning_rate": 1.993207428076885e-06, "logits/chosen": -0.6053364276885986, "logits/rejected": -0.611596941947937, "logps/chosen": -201.6093292236328, "logps/rejected": -236.06838989257812, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": -0.23149362206459045, "rewards/margins": 4.775941848754883, "rewards/rejected": -5.0074357986450195, "step": 387 }, { "epoch": 0.13, "learning_rate": 1.9931429647952682e-06, "logits/chosen": -0.5473848581314087, "logits/rejected": -0.5353136658668518, "logps/chosen": -194.57196044921875, "logps/rejected": -182.74986267089844, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": 0.6286775469779968, "rewards/margins": 4.59498929977417, "rewards/rejected": -3.966311454772949, "step": 388 }, { "epoch": 0.13, "learning_rate": 1.993078198122182e-06, "logits/chosen": -0.5778595805168152, "logits/rejected": -0.5595454573631287, "logps/chosen": -212.34341430664062, "logps/rejected": -268.6197509765625, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": 0.32179945707321167, "rewards/margins": 7.472598552703857, "rewards/rejected": -7.15079927444458, "step": 389 }, { "epoch": 0.13, "learning_rate": 1.9930131280774103e-06, "logits/chosen": -0.6781339049339294, "logits/rejected": -0.6729087233543396, "logps/chosen": -223.79873657226562, "logps/rejected": -252.07101440429688, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 0.5564135909080505, "rewards/margins": 5.559327125549316, "rewards/rejected": -5.002913475036621, "step": 390 }, { "epoch": 0.13, "learning_rate": 1.9929477546808326e-06, "logits/chosen": -0.5957579016685486, "logits/rejected": -0.5757873058319092, "logps/chosen": -176.3624267578125, "logps/rejected": -163.26034545898438, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": 0.14729103446006775, "rewards/margins": 3.55570650100708, "rewards/rejected": -3.4084153175354004, "step": 391 }, { "epoch": 0.13, "learning_rate": 1.9928820779524184e-06, "logits/chosen": -0.5915986895561218, "logits/rejected": -0.5804966688156128, "logps/chosen": -166.58212280273438, "logps/rejected": -190.53126525878906, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": 0.1488422006368637, "rewards/margins": 5.29189395904541, "rewards/rejected": -5.143052101135254, "step": 392 }, { "epoch": 0.13, "learning_rate": 1.9928160979122317e-06, "logits/chosen": -0.5471780300140381, "logits/rejected": -0.5190848112106323, "logps/chosen": -189.08865356445312, "logps/rejected": -181.6259307861328, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": 0.08640500903129578, "rewards/margins": 4.974941253662109, "rewards/rejected": -4.88853645324707, "step": 393 }, { "epoch": 0.13, "learning_rate": 1.9927498145804277e-06, "logits/chosen": -0.5205125212669373, "logits/rejected": -0.5062243938446045, "logps/chosen": -157.04864501953125, "logps/rejected": -232.62042236328125, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.41658297181129456, "rewards/margins": 6.6447367668151855, "rewards/rejected": -6.228154182434082, "step": 394 }, { "epoch": 0.13, "learning_rate": 1.992683227977256e-06, "logits/chosen": -0.6669014096260071, "logits/rejected": -0.6649410128593445, "logps/chosen": -175.48631286621094, "logps/rejected": -254.26715087890625, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -0.3245745301246643, "rewards/margins": 6.429210662841797, "rewards/rejected": -6.753785133361816, "step": 395 }, { "epoch": 0.14, "learning_rate": 1.9926163381230578e-06, "logits/chosen": -0.6431692838668823, "logits/rejected": -0.634501039981842, "logps/chosen": -218.07113647460938, "logps/rejected": -269.3594665527344, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -0.24687328934669495, "rewards/margins": 5.584637641906738, "rewards/rejected": -5.831511497497559, "step": 396 }, { "epoch": 0.14, "learning_rate": 1.9925491450382663e-06, "logits/chosen": -0.5361858606338501, "logits/rejected": -0.513282299041748, "logps/chosen": -147.75213623046875, "logps/rejected": -221.34735107421875, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -0.4538000226020813, "rewards/margins": 5.48140811920166, "rewards/rejected": -5.935208320617676, "step": 397 }, { "epoch": 0.14, "learning_rate": 1.9924816487434085e-06, "logits/chosen": -0.6567928194999695, "logits/rejected": -0.6381385326385498, "logps/chosen": -222.41946411132812, "logps/rejected": -242.27667236328125, "loss": 0.1014, "rewards/accuracies": 0.9375, "rewards/chosen": -0.006245024502277374, "rewards/margins": 6.093300819396973, "rewards/rejected": -6.099546432495117, "step": 398 }, { "epoch": 0.14, "learning_rate": 1.992413849259104e-06, "logits/chosen": -0.6051262617111206, "logits/rejected": -0.5853128433227539, "logps/chosen": -148.14724731445312, "logps/rejected": -149.53111267089844, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": 0.3790878653526306, "rewards/margins": 4.514636039733887, "rewards/rejected": -4.1355485916137695, "step": 399 }, { "epoch": 0.14, "learning_rate": 1.992345746606063e-06, "logits/chosen": -0.6052067875862122, "logits/rejected": -0.5953944325447083, "logps/chosen": -180.3091583251953, "logps/rejected": -245.54327392578125, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": -0.06465888023376465, "rewards/margins": 5.9125213623046875, "rewards/rejected": -5.977179527282715, "step": 400 }, { "epoch": 0.14, "learning_rate": 1.9922773408050925e-06, "logits/chosen": -0.6272994875907898, "logits/rejected": -0.6092950105667114, "logps/chosen": -180.05274963378906, "logps/rejected": -197.97801208496094, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": 1.108228325843811, "rewards/margins": 5.273257255554199, "rewards/rejected": -4.165029048919678, "step": 401 }, { "epoch": 0.14, "learning_rate": 1.992208631877087e-06, "logits/chosen": -0.5919588804244995, "logits/rejected": -0.5800977349281311, "logps/chosen": -172.8690948486328, "logps/rejected": -196.9059600830078, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.050962530076503754, "rewards/margins": 6.154799461364746, "rewards/rejected": -6.205761909484863, "step": 402 }, { "epoch": 0.14, "learning_rate": 1.992139619843038e-06, "logits/chosen": -0.6470677852630615, "logits/rejected": -0.6313095688819885, "logps/chosen": -143.03872680664062, "logps/rejected": -177.26043701171875, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": -0.09007259458303452, "rewards/margins": 4.2480058670043945, "rewards/rejected": -4.338078498840332, "step": 403 }, { "epoch": 0.14, "learning_rate": 1.9920703047240266e-06, "logits/chosen": -0.5937507748603821, "logits/rejected": -0.5782697796821594, "logps/chosen": -213.14125061035156, "logps/rejected": -234.58236694335938, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 0.7809943556785583, "rewards/margins": 5.722841262817383, "rewards/rejected": -4.94184684753418, "step": 404 }, { "epoch": 0.14, "learning_rate": 1.992000686541228e-06, "logits/chosen": -0.6051216721534729, "logits/rejected": -0.6010292768478394, "logps/chosen": -159.43093872070312, "logps/rejected": -208.32345581054688, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": 0.3139275312423706, "rewards/margins": 5.243846893310547, "rewards/rejected": -4.929919719696045, "step": 405 }, { "epoch": 0.14, "learning_rate": 1.9919307653159095e-06, "logits/chosen": -0.6648198366165161, "logits/rejected": -0.6541640162467957, "logps/chosen": -250.46795654296875, "logps/rejected": -316.7771911621094, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": -0.16053307056427002, "rewards/margins": 7.571540832519531, "rewards/rejected": -7.732073783874512, "step": 406 }, { "epoch": 0.14, "learning_rate": 1.9918605410694316e-06, "logits/chosen": -0.5877513289451599, "logits/rejected": -0.5832951068878174, "logps/chosen": -184.73074340820312, "logps/rejected": -257.686767578125, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -0.2660050392150879, "rewards/margins": 7.29006814956665, "rewards/rejected": -7.556073188781738, "step": 407 }, { "epoch": 0.14, "learning_rate": 1.991790013823246e-06, "logits/chosen": -0.5885722637176514, "logits/rejected": -0.5909903049468994, "logps/chosen": -228.81698608398438, "logps/rejected": -267.7356872558594, "loss": 0.0821, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4565752446651459, "rewards/margins": 5.675168991088867, "rewards/rejected": -5.218593597412109, "step": 408 }, { "epoch": 0.14, "learning_rate": 1.991719183598898e-06, "logits/chosen": -0.6611443161964417, "logits/rejected": -0.6677222847938538, "logps/chosen": -205.2543487548828, "logps/rejected": -285.2443542480469, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": -0.2071215808391571, "rewards/margins": 5.909677028656006, "rewards/rejected": -6.116798400878906, "step": 409 }, { "epoch": 0.14, "learning_rate": 1.9916480504180257e-06, "logits/chosen": -0.6158881783485413, "logits/rejected": -0.5886860489845276, "logps/chosen": -215.8511962890625, "logps/rejected": -225.45408630371094, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": 0.13261941075325012, "rewards/margins": 6.39888858795166, "rewards/rejected": -6.266269207000732, "step": 410 }, { "epoch": 0.14, "learning_rate": 1.991576614302359e-06, "logits/chosen": -0.6660731434822083, "logits/rejected": -0.65021151304245, "logps/chosen": -175.61782836914062, "logps/rejected": -237.8206024169922, "loss": 0.1165, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1667690873146057, "rewards/margins": 6.809041500091553, "rewards/rejected": -6.975811004638672, "step": 411 }, { "epoch": 0.14, "learning_rate": 1.9915048752737207e-06, "logits/chosen": -0.5597833395004272, "logits/rejected": -0.5416849851608276, "logps/chosen": -146.8406219482422, "logps/rejected": -175.7745819091797, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -0.5822464227676392, "rewards/margins": 5.508040904998779, "rewards/rejected": -6.090286731719971, "step": 412 }, { "epoch": 0.14, "learning_rate": 1.9914328333540264e-06, "logits/chosen": -0.6321763396263123, "logits/rejected": -0.6147677302360535, "logps/chosen": -254.47158813476562, "logps/rejected": -288.52734375, "loss": 0.1672, "rewards/accuracies": 1.0, "rewards/chosen": -0.34289419651031494, "rewards/margins": 6.032918930053711, "rewards/rejected": -6.375812530517578, "step": 413 }, { "epoch": 0.14, "learning_rate": 1.991360488565283e-06, "logits/chosen": -0.657249391078949, "logits/rejected": -0.6500265002250671, "logps/chosen": -175.26589965820312, "logps/rejected": -247.21469116210938, "loss": 0.0824, "rewards/accuracies": 1.0, "rewards/chosen": -0.12701067328453064, "rewards/margins": 5.770928382873535, "rewards/rejected": -5.897938251495361, "step": 414 }, { "epoch": 0.14, "learning_rate": 1.991287840929592e-06, "logits/chosen": -0.7220807075500488, "logits/rejected": -0.7144685983657837, "logps/chosen": -126.95104217529297, "logps/rejected": -186.78375244140625, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": 0.003591112792491913, "rewards/margins": 4.842615127563477, "rewards/rejected": -4.839024066925049, "step": 415 }, { "epoch": 0.14, "learning_rate": 1.991214890469145e-06, "logits/chosen": -0.6311745047569275, "logits/rejected": -0.639719545841217, "logps/chosen": -173.2413330078125, "logps/rejected": -239.2959747314453, "loss": 0.0779, "rewards/accuracies": 0.9375, "rewards/chosen": 0.07521706819534302, "rewards/margins": 6.109485149383545, "rewards/rejected": -6.034267902374268, "step": 416 }, { "epoch": 0.14, "learning_rate": 1.9911416372062284e-06, "logits/chosen": -0.6636407375335693, "logits/rejected": -0.6527101993560791, "logps/chosen": -185.2737274169922, "logps/rejected": -221.11582946777344, "loss": 0.1384, "rewards/accuracies": 0.875, "rewards/chosen": 0.01673915982246399, "rewards/margins": 5.845710754394531, "rewards/rejected": -5.828972339630127, "step": 417 }, { "epoch": 0.14, "learning_rate": 1.9910680811632195e-06, "logits/chosen": -0.8302767872810364, "logits/rejected": -0.8163372278213501, "logps/chosen": -242.51243591308594, "logps/rejected": -326.5082702636719, "loss": 0.0894, "rewards/accuracies": 1.0, "rewards/chosen": -0.10451295971870422, "rewards/margins": 7.173513412475586, "rewards/rejected": -7.278026103973389, "step": 418 }, { "epoch": 0.14, "learning_rate": 1.990994222362589e-06, "logits/chosen": -0.6415287256240845, "logits/rejected": -0.6333483457565308, "logps/chosen": -167.8907012939453, "logps/rejected": -207.42906188964844, "loss": 0.0694, "rewards/accuracies": 1.0, "rewards/chosen": -0.6309280395507812, "rewards/margins": 4.512163162231445, "rewards/rejected": -5.143091678619385, "step": 419 }, { "epoch": 0.14, "learning_rate": 1.9909200608268996e-06, "logits/chosen": -0.687240719795227, "logits/rejected": -0.6718559265136719, "logps/chosen": -252.40977478027344, "logps/rejected": -287.0133056640625, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": 0.26245132088661194, "rewards/margins": 6.734663963317871, "rewards/rejected": -6.472212314605713, "step": 420 }, { "epoch": 0.14, "learning_rate": 1.9908455965788067e-06, "logits/chosen": -0.6143068075180054, "logits/rejected": -0.6040945649147034, "logps/chosen": -174.96493530273438, "logps/rejected": -225.56394958496094, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -0.6648268699645996, "rewards/margins": 5.910372734069824, "rewards/rejected": -6.575199604034424, "step": 421 }, { "epoch": 0.14, "learning_rate": 1.990770829641058e-06, "logits/chosen": -0.6396437883377075, "logits/rejected": -0.6253166198730469, "logps/chosen": -147.7808380126953, "logps/rejected": -227.1660919189453, "loss": 0.068, "rewards/accuracies": 1.0, "rewards/chosen": 0.2838556468486786, "rewards/margins": 5.292267799377441, "rewards/rejected": -5.0084123611450195, "step": 422 }, { "epoch": 0.14, "learning_rate": 1.9906957600364937e-06, "logits/chosen": -0.7128890156745911, "logits/rejected": -0.6967065930366516, "logps/chosen": -232.0174560546875, "logps/rejected": -270.72747802734375, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -0.14089122414588928, "rewards/margins": 4.880351543426514, "rewards/rejected": -5.021242618560791, "step": 423 }, { "epoch": 0.14, "learning_rate": 1.9906203877880464e-06, "logits/chosen": -0.7173699736595154, "logits/rejected": -0.7138528823852539, "logps/chosen": -131.5421142578125, "logps/rejected": -185.45140075683594, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": 0.21164456009864807, "rewards/margins": 6.014861106872559, "rewards/rejected": -5.803216457366943, "step": 424 }, { "epoch": 0.15, "learning_rate": 1.9905447129187417e-06, "logits/chosen": -0.6299283504486084, "logits/rejected": -0.6146520376205444, "logps/chosen": -220.38453674316406, "logps/rejected": -302.08697509765625, "loss": 0.09, "rewards/accuracies": 1.0, "rewards/chosen": -0.680473804473877, "rewards/margins": 6.7201433181762695, "rewards/rejected": -7.400617599487305, "step": 425 }, { "epoch": 0.15, "learning_rate": 1.9904687354516973e-06, "logits/chosen": -0.7408886551856995, "logits/rejected": -0.7159721851348877, "logps/chosen": -226.5364227294922, "logps/rejected": -275.8349609375, "loss": 0.0825, "rewards/accuracies": 0.9375, "rewards/chosen": -0.17034710943698883, "rewards/margins": 6.738502025604248, "rewards/rejected": -6.908849239349365, "step": 426 }, { "epoch": 0.15, "learning_rate": 1.9903924554101222e-06, "logits/chosen": -0.5907770991325378, "logits/rejected": -0.5982339382171631, "logps/chosen": -268.90045166015625, "logps/rejected": -342.1773376464844, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -0.9372033476829529, "rewards/margins": 8.553874969482422, "rewards/rejected": -9.49107837677002, "step": 427 }, { "epoch": 0.15, "learning_rate": 1.9903158728173205e-06, "logits/chosen": -0.7195069789886475, "logits/rejected": -0.6946970224380493, "logps/chosen": -247.87973022460938, "logps/rejected": -266.30126953125, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -0.22710415720939636, "rewards/margins": 7.7029948234558105, "rewards/rejected": -7.930098533630371, "step": 428 }, { "epoch": 0.15, "learning_rate": 1.990238987696686e-06, "logits/chosen": -0.643136739730835, "logits/rejected": -0.629250705242157, "logps/chosen": -152.27748107910156, "logps/rejected": -172.9344940185547, "loss": 0.073, "rewards/accuracies": 1.0, "rewards/chosen": -0.3378306031227112, "rewards/margins": 4.421639442443848, "rewards/rejected": -4.759469985961914, "step": 429 }, { "epoch": 0.15, "learning_rate": 1.9901618000717064e-06, "logits/chosen": -0.6713863015174866, "logits/rejected": -0.6601622700691223, "logps/chosen": -193.099853515625, "logps/rejected": -260.4646301269531, "loss": 0.0694, "rewards/accuracies": 1.0, "rewards/chosen": 0.2229272425174713, "rewards/margins": 5.920440673828125, "rewards/rejected": -5.697513103485107, "step": 430 }, { "epoch": 0.15, "learning_rate": 1.9900843099659614e-06, "logits/chosen": -0.6825876235961914, "logits/rejected": -0.6835317015647888, "logps/chosen": -156.3742218017578, "logps/rejected": -220.3001708984375, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.3589758574962616, "rewards/margins": 6.306166172027588, "rewards/rejected": -5.947190284729004, "step": 431 }, { "epoch": 0.15, "learning_rate": 1.9900065174031227e-06, "logits/chosen": -0.7358061671257019, "logits/rejected": -0.7306286096572876, "logps/chosen": -191.62168884277344, "logps/rejected": -255.11624145507812, "loss": 0.076, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0487799346446991, "rewards/margins": 6.722264289855957, "rewards/rejected": -6.6734843254089355, "step": 432 }, { "epoch": 0.15, "learning_rate": 1.989928422406956e-06, "logits/chosen": -0.6563835144042969, "logits/rejected": -0.6548231244087219, "logps/chosen": -163.46517944335938, "logps/rejected": -205.596435546875, "loss": 0.0779, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9717164635658264, "rewards/margins": 5.505099773406982, "rewards/rejected": -6.476816177368164, "step": 433 }, { "epoch": 0.15, "learning_rate": 1.9898500250013172e-06, "logits/chosen": -0.6313825845718384, "logits/rejected": -0.6117590069770813, "logps/chosen": -192.12249755859375, "logps/rejected": -277.74041748046875, "loss": 0.118, "rewards/accuracies": 1.0, "rewards/chosen": 0.17604683339595795, "rewards/margins": 8.52646255493164, "rewards/rejected": -8.35041618347168, "step": 434 }, { "epoch": 0.15, "learning_rate": 1.989771325210156e-06, "logits/chosen": -0.6847187876701355, "logits/rejected": -0.6834619045257568, "logps/chosen": -134.9987335205078, "logps/rejected": -204.17208862304688, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -0.7221643924713135, "rewards/margins": 4.710382461547852, "rewards/rejected": -5.432546138763428, "step": 435 }, { "epoch": 0.15, "learning_rate": 1.9896923230575144e-06, "logits/chosen": -0.7567680478096008, "logits/rejected": -0.7462976574897766, "logps/chosen": -142.40647888183594, "logps/rejected": -181.68408203125, "loss": 0.0786, "rewards/accuracies": 0.9375, "rewards/chosen": -0.11403068900108337, "rewards/margins": 5.636609077453613, "rewards/rejected": -5.750639915466309, "step": 436 }, { "epoch": 0.15, "learning_rate": 1.989613018567526e-06, "logits/chosen": -0.6923786401748657, "logits/rejected": -0.6893362998962402, "logps/chosen": -109.39179229736328, "logps/rejected": -177.74923706054688, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -0.3980853855609894, "rewards/margins": 4.959202766418457, "rewards/rejected": -5.357288360595703, "step": 437 }, { "epoch": 0.15, "learning_rate": 1.989533411764417e-06, "logits/chosen": -0.7479335069656372, "logits/rejected": -0.7343980073928833, "logps/chosen": -168.72918701171875, "logps/rejected": -215.7264862060547, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": 0.257872998714447, "rewards/margins": 6.2008562088012695, "rewards/rejected": -5.942983150482178, "step": 438 }, { "epoch": 0.15, "learning_rate": 1.989453502672507e-06, "logits/chosen": -0.6402138471603394, "logits/rejected": -0.6254709362983704, "logps/chosen": -171.7604522705078, "logps/rejected": -216.50457763671875, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -0.16081678867340088, "rewards/margins": 6.994487762451172, "rewards/rejected": -7.155305862426758, "step": 439 }, { "epoch": 0.15, "learning_rate": 1.989373291316207e-06, "logits/chosen": -0.6850622892379761, "logits/rejected": -0.6669113039970398, "logps/chosen": -202.7044219970703, "logps/rejected": -297.5244445800781, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": 0.327785462141037, "rewards/margins": 8.915475845336914, "rewards/rejected": -8.587690353393555, "step": 440 }, { "epoch": 0.15, "learning_rate": 1.989292777720019e-06, "logits/chosen": -0.7174695134162903, "logits/rejected": -0.7161256074905396, "logps/chosen": -164.89988708496094, "logps/rejected": -242.0570831298828, "loss": 0.1249, "rewards/accuracies": 1.0, "rewards/chosen": -0.3507133722305298, "rewards/margins": 5.8082075119018555, "rewards/rejected": -6.158921241760254, "step": 441 }, { "epoch": 0.15, "learning_rate": 1.989211961908541e-06, "logits/chosen": -0.7483208775520325, "logits/rejected": -0.7383852601051331, "logps/chosen": -149.929931640625, "logps/rejected": -202.02325439453125, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": 0.006501674652099609, "rewards/margins": 4.5908026695251465, "rewards/rejected": -4.584300518035889, "step": 442 }, { "epoch": 0.15, "learning_rate": 1.98913084390646e-06, "logits/chosen": -0.7202710509300232, "logits/rejected": -0.7055291533470154, "logps/chosen": -122.01713562011719, "logps/rejected": -195.15579223632812, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6658505797386169, "rewards/margins": 6.118661880493164, "rewards/rejected": -6.784512519836426, "step": 443 }, { "epoch": 0.15, "learning_rate": 1.989049423738556e-06, "logits/chosen": -0.7844656705856323, "logits/rejected": -0.7718104124069214, "logps/chosen": -184.092041015625, "logps/rejected": -262.772216796875, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": 0.02028752863407135, "rewards/margins": 7.130314826965332, "rewards/rejected": -7.110027313232422, "step": 444 }, { "epoch": 0.15, "learning_rate": 1.9889677014297023e-06, "logits/chosen": -0.6321685314178467, "logits/rejected": -0.6286938190460205, "logps/chosen": -217.8742218017578, "logps/rejected": -274.31500244140625, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": -0.8545080423355103, "rewards/margins": 5.852554798126221, "rewards/rejected": -6.707063674926758, "step": 445 }, { "epoch": 0.15, "learning_rate": 1.9888856770048643e-06, "logits/chosen": -0.7455723881721497, "logits/rejected": -0.7224897146224976, "logps/chosen": -158.30484008789062, "logps/rejected": -172.44482421875, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -0.5619282722473145, "rewards/margins": 5.552692413330078, "rewards/rejected": -6.114620208740234, "step": 446 }, { "epoch": 0.15, "learning_rate": 1.9888033504890984e-06, "logits/chosen": -0.7343344688415527, "logits/rejected": -0.735253632068634, "logps/chosen": -205.3547821044922, "logps/rejected": -289.7119445800781, "loss": 0.0885, "rewards/accuracies": 0.9375, "rewards/chosen": -0.21320077776908875, "rewards/margins": 7.433586120605469, "rewards/rejected": -7.646786689758301, "step": 447 }, { "epoch": 0.15, "learning_rate": 1.988720721907555e-06, "logits/chosen": -0.685785710811615, "logits/rejected": -0.6541160345077515, "logps/chosen": -195.0511474609375, "logps/rejected": -230.5569305419922, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": 0.6026992201805115, "rewards/margins": 7.097907066345215, "rewards/rejected": -6.495208263397217, "step": 448 }, { "epoch": 0.15, "learning_rate": 1.988637791285476e-06, "logits/chosen": -0.7837135195732117, "logits/rejected": -0.7849069833755493, "logps/chosen": -118.0024642944336, "logps/rejected": -208.31973266601562, "loss": 0.1006, "rewards/accuracies": 0.9375, "rewards/chosen": -0.512369692325592, "rewards/margins": 5.355894088745117, "rewards/rejected": -5.868264198303223, "step": 449 }, { "epoch": 0.15, "learning_rate": 1.9885545586481945e-06, "logits/chosen": -0.8176465034484863, "logits/rejected": -0.8115655779838562, "logps/chosen": -176.4977569580078, "logps/rejected": -225.867431640625, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 0.21771101653575897, "rewards/margins": 4.629883766174316, "rewards/rejected": -4.412173271179199, "step": 450 }, { "epoch": 0.15, "learning_rate": 1.988471024021138e-06, "logits/chosen": -0.7684544324874878, "logits/rejected": -0.7616086006164551, "logps/chosen": -192.3586883544922, "logps/rejected": -239.78640747070312, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": 0.18918853998184204, "rewards/margins": 6.120048522949219, "rewards/rejected": -5.930859565734863, "step": 451 }, { "epoch": 0.15, "learning_rate": 1.9883871874298253e-06, "logits/chosen": -0.7511894106864929, "logits/rejected": -0.7444146871566772, "logps/chosen": -249.670166015625, "logps/rejected": -277.71337890625, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": 1.001878023147583, "rewards/margins": 7.347476482391357, "rewards/rejected": -6.345598220825195, "step": 452 }, { "epoch": 0.15, "learning_rate": 1.9883030488998663e-06, "logits/chosen": -0.8007248640060425, "logits/rejected": -0.7700740098953247, "logps/chosen": -288.0235290527344, "logps/rejected": -284.35662841796875, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": 0.2298157513141632, "rewards/margins": 6.298513412475586, "rewards/rejected": -6.068697452545166, "step": 453 }, { "epoch": 0.15, "learning_rate": 1.988218608456965e-06, "logits/chosen": -0.7818282246589661, "logits/rejected": -0.7742025256156921, "logps/chosen": -142.98086547851562, "logps/rejected": -199.0462646484375, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -0.10478198528289795, "rewards/margins": 5.231759071350098, "rewards/rejected": -5.336541175842285, "step": 454 }, { "epoch": 0.16, "learning_rate": 1.9881338661269163e-06, "logits/chosen": -0.7117846012115479, "logits/rejected": -0.70198655128479, "logps/chosen": -210.31195068359375, "logps/rejected": -291.5275573730469, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -0.09184503555297852, "rewards/margins": 9.42788028717041, "rewards/rejected": -9.519725799560547, "step": 455 }, { "epoch": 0.16, "learning_rate": 1.9880488219356083e-06, "logits/chosen": -0.7434107065200806, "logits/rejected": -0.7244666218757629, "logps/chosen": -227.69219970703125, "logps/rejected": -249.3534698486328, "loss": 0.0954, "rewards/accuracies": 1.0, "rewards/chosen": -0.5766361355781555, "rewards/margins": 6.7939252853393555, "rewards/rejected": -7.370561122894287, "step": 456 }, { "epoch": 0.16, "learning_rate": 1.9879634759090205e-06, "logits/chosen": -0.6993148326873779, "logits/rejected": -0.6904177665710449, "logps/chosen": -176.2819061279297, "logps/rejected": -269.5406188964844, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -0.6190109848976135, "rewards/margins": 7.338454723358154, "rewards/rejected": -7.957466125488281, "step": 457 }, { "epoch": 0.16, "learning_rate": 1.9878778280732252e-06, "logits/chosen": -0.6729496121406555, "logits/rejected": -0.6557715535163879, "logps/chosen": -189.62408447265625, "logps/rejected": -260.1844482421875, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": 0.2571462094783783, "rewards/margins": 6.851862907409668, "rewards/rejected": -6.594716548919678, "step": 458 }, { "epoch": 0.16, "learning_rate": 1.987791878454386e-06, "logits/chosen": -0.6426429152488708, "logits/rejected": -0.6374653577804565, "logps/chosen": -159.6026153564453, "logps/rejected": -266.5948791503906, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -0.5848131775856018, "rewards/margins": 7.1361212730407715, "rewards/rejected": -7.72093391418457, "step": 459 }, { "epoch": 0.16, "learning_rate": 1.9877056270787603e-06, "logits/chosen": -0.7928441762924194, "logits/rejected": -0.7830841541290283, "logps/chosen": -120.16950225830078, "logps/rejected": -210.35760498046875, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -0.2473292350769043, "rewards/margins": 6.470927715301514, "rewards/rejected": -6.718256950378418, "step": 460 }, { "epoch": 0.16, "learning_rate": 1.987619073972696e-06, "logits/chosen": -0.7311963438987732, "logits/rejected": -0.7128956913948059, "logps/chosen": -189.19403076171875, "logps/rejected": -228.78717041015625, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/chosen": -0.6031790971755981, "rewards/margins": 6.1992926597595215, "rewards/rejected": -6.802472114562988, "step": 461 }, { "epoch": 0.16, "learning_rate": 1.9875322191626334e-06, "logits/chosen": -0.7656455039978027, "logits/rejected": -0.745287299156189, "logps/chosen": -262.82745361328125, "logps/rejected": -329.00250244140625, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.27265897393226624, "rewards/margins": 9.770225524902344, "rewards/rejected": -10.042884826660156, "step": 462 }, { "epoch": 0.16, "learning_rate": 1.987445062675107e-06, "logits/chosen": -0.7644910216331482, "logits/rejected": -0.7173686027526855, "logps/chosen": -229.5654754638672, "logps/rejected": -245.62872314453125, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -0.16986502707004547, "rewards/margins": 7.8483381271362305, "rewards/rejected": -8.018202781677246, "step": 463 }, { "epoch": 0.16, "learning_rate": 1.9873576045367404e-06, "logits/chosen": -0.732711672782898, "logits/rejected": -0.7093803286552429, "logps/chosen": -161.38299560546875, "logps/rejected": -210.60899353027344, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": -0.055416226387023926, "rewards/margins": 6.036530494689941, "rewards/rejected": -6.091946125030518, "step": 464 }, { "epoch": 0.16, "learning_rate": 1.987269844774252e-06, "logits/chosen": -0.7454850077629089, "logits/rejected": -0.7092386484146118, "logps/chosen": -191.46365356445312, "logps/rejected": -233.5840301513672, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 0.1172044426202774, "rewards/margins": 7.248037338256836, "rewards/rejected": -7.130833148956299, "step": 465 }, { "epoch": 0.16, "learning_rate": 1.98718178341445e-06, "logits/chosen": -0.7713756561279297, "logits/rejected": -0.753746747970581, "logps/chosen": -213.90750122070312, "logps/rejected": -289.0322570800781, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": 0.4718070924282074, "rewards/margins": 8.227437019348145, "rewards/rejected": -7.7556304931640625, "step": 466 }, { "epoch": 0.16, "learning_rate": 1.9870934204842367e-06, "logits/chosen": -0.6917895674705505, "logits/rejected": -0.6700896620750427, "logps/chosen": -172.58782958984375, "logps/rejected": -185.69529724121094, "loss": 0.1012, "rewards/accuracies": 1.0, "rewards/chosen": 0.04095837473869324, "rewards/margins": 4.3953022956848145, "rewards/rejected": -4.354343891143799, "step": 467 }, { "epoch": 0.16, "learning_rate": 1.9870047560106062e-06, "logits/chosen": -0.8374903798103333, "logits/rejected": -0.8187865614891052, "logps/chosen": -221.36851501464844, "logps/rejected": -279.5861511230469, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": -0.044905439019203186, "rewards/margins": 7.015816688537598, "rewards/rejected": -7.060722351074219, "step": 468 }, { "epoch": 0.16, "learning_rate": 1.9869157900206433e-06, "logits/chosen": -0.7895568013191223, "logits/rejected": -0.7781420350074768, "logps/chosen": -146.0645751953125, "logps/rejected": -190.4185028076172, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -0.7225528955459595, "rewards/margins": 5.913210868835449, "rewards/rejected": -6.635763645172119, "step": 469 }, { "epoch": 0.16, "learning_rate": 1.986826522541526e-06, "logits/chosen": -0.6429703235626221, "logits/rejected": -0.6417461037635803, "logps/chosen": -207.482666015625, "logps/rejected": -318.6422424316406, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": 0.2789217531681061, "rewards/margins": 8.623615264892578, "rewards/rejected": -8.344693183898926, "step": 470 }, { "epoch": 0.16, "learning_rate": 1.986736953600525e-06, "logits/chosen": -0.7388911843299866, "logits/rejected": -0.7293589115142822, "logps/chosen": -172.60427856445312, "logps/rejected": -264.7646179199219, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": -0.45484834909439087, "rewards/margins": 8.77044677734375, "rewards/rejected": -9.225296020507812, "step": 471 }, { "epoch": 0.16, "learning_rate": 1.9866470832250016e-06, "logits/chosen": -0.7975963950157166, "logits/rejected": -0.7815307378768921, "logps/chosen": -157.89532470703125, "logps/rejected": -208.661376953125, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7548466324806213, "rewards/margins": 5.3420610427856445, "rewards/rejected": -6.096907615661621, "step": 472 }, { "epoch": 0.16, "learning_rate": 1.9865569114424106e-06, "logits/chosen": -0.8639338612556458, "logits/rejected": -0.8434390425682068, "logps/chosen": -198.99180603027344, "logps/rejected": -264.96429443359375, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": -0.3473438024520874, "rewards/margins": 5.334269046783447, "rewards/rejected": -5.681612968444824, "step": 473 }, { "epoch": 0.16, "learning_rate": 1.986466438280298e-06, "logits/chosen": -0.810992419719696, "logits/rejected": -0.8069273233413696, "logps/chosen": -191.1982421875, "logps/rejected": -224.5850830078125, "loss": 0.0537, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4901416301727295, "rewards/margins": 6.507699966430664, "rewards/rejected": -6.997840881347656, "step": 474 }, { "epoch": 0.16, "learning_rate": 1.9863756637663015e-06, "logits/chosen": -0.8232729434967041, "logits/rejected": -0.8147844672203064, "logps/chosen": -159.32199096679688, "logps/rejected": -220.75379943847656, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": 0.019129887223243713, "rewards/margins": 5.491466999053955, "rewards/rejected": -5.47233772277832, "step": 475 }, { "epoch": 0.16, "learning_rate": 1.9862845879281525e-06, "logits/chosen": -0.6814796924591064, "logits/rejected": -0.6539450883865356, "logps/chosen": -237.83856201171875, "logps/rejected": -305.2504577636719, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -0.1990966647863388, "rewards/margins": 8.39084243774414, "rewards/rejected": -8.58993911743164, "step": 476 }, { "epoch": 0.16, "learning_rate": 1.9861932107936727e-06, "logits/chosen": -0.7245635986328125, "logits/rejected": -0.7270730137825012, "logps/chosen": -197.93077087402344, "logps/rejected": -274.5625, "loss": 0.0531, "rewards/accuracies": 0.9375, "rewards/chosen": 0.17713171243667603, "rewards/margins": 6.360498905181885, "rewards/rejected": -6.183367729187012, "step": 477 }, { "epoch": 0.16, "learning_rate": 1.986101532390777e-06, "logits/chosen": -0.8044847249984741, "logits/rejected": -0.7899697422981262, "logps/chosen": -133.35533142089844, "logps/rejected": -230.13450622558594, "loss": 0.0711, "rewards/accuracies": 1.0, "rewards/chosen": -0.28273218870162964, "rewards/margins": 7.415511608123779, "rewards/rejected": -7.698244094848633, "step": 478 }, { "epoch": 0.16, "learning_rate": 1.9860095527474715e-06, "logits/chosen": -0.8370399475097656, "logits/rejected": -0.8211938142776489, "logps/chosen": -197.0704803466797, "logps/rejected": -242.79830932617188, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 0.1423662155866623, "rewards/margins": 8.38107681274414, "rewards/rejected": -8.238710403442383, "step": 479 }, { "epoch": 0.16, "learning_rate": 1.985917271891855e-06, "logits/chosen": -0.7511070370674133, "logits/rejected": -0.740608274936676, "logps/chosen": -148.3437957763672, "logps/rejected": -204.29078674316406, "loss": 0.0862, "rewards/accuracies": 1.0, "rewards/chosen": 0.5219360589981079, "rewards/margins": 7.80977725982666, "rewards/rejected": -7.287841320037842, "step": 480 }, { "epoch": 0.16, "learning_rate": 1.9858246898521175e-06, "logits/chosen": -0.7469066381454468, "logits/rejected": -0.7321364879608154, "logps/chosen": -151.08901977539062, "logps/rejected": -237.46240234375, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.6760196089744568, "rewards/margins": 6.455543518066406, "rewards/rejected": -7.131563186645508, "step": 481 }, { "epoch": 0.16, "learning_rate": 1.9857318066565423e-06, "logits/chosen": -0.8415659070014954, "logits/rejected": -0.8257678747177124, "logps/chosen": -157.8345184326172, "logps/rejected": -247.86825561523438, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435060352087021, "rewards/margins": 6.910971641540527, "rewards/rejected": -7.054477691650391, "step": 482 }, { "epoch": 0.16, "learning_rate": 1.9856386223335034e-06, "logits/chosen": -0.7769153118133545, "logits/rejected": -0.7507975101470947, "logps/chosen": -161.19674682617188, "logps/rejected": -169.28713989257812, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -0.0224558487534523, "rewards/margins": 5.30147123336792, "rewards/rejected": -5.32392692565918, "step": 483 }, { "epoch": 0.17, "learning_rate": 1.9855451369114677e-06, "logits/chosen": -0.8434085249900818, "logits/rejected": -0.8375925421714783, "logps/chosen": -219.26919555664062, "logps/rejected": -303.62890625, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": -0.8211794495582581, "rewards/margins": 8.750727653503418, "rewards/rejected": -9.571907043457031, "step": 484 }, { "epoch": 0.17, "learning_rate": 1.985451350418993e-06, "logits/chosen": -0.8348523378372192, "logits/rejected": -0.8250631093978882, "logps/chosen": -148.94155883789062, "logps/rejected": -229.16729736328125, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.1354164481163025, "rewards/margins": 6.184988498687744, "rewards/rejected": -6.320405006408691, "step": 485 }, { "epoch": 0.17, "learning_rate": 1.985357262884731e-06, "logits/chosen": -0.7711573243141174, "logits/rejected": -0.7673311829566956, "logps/chosen": -158.47630310058594, "logps/rejected": -226.8828887939453, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/chosen": -0.369701623916626, "rewards/margins": 4.712550640106201, "rewards/rejected": -5.08225154876709, "step": 486 }, { "epoch": 0.17, "learning_rate": 1.985262874337423e-06, "logits/chosen": -0.8307132720947266, "logits/rejected": -0.8014312386512756, "logps/chosen": -211.25079345703125, "logps/rejected": -256.7213134765625, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": 0.4440866708755493, "rewards/margins": 6.980578899383545, "rewards/rejected": -6.536491394042969, "step": 487 }, { "epoch": 0.17, "learning_rate": 1.985168184805904e-06, "logits/chosen": -0.7968885898590088, "logits/rejected": -0.7557063698768616, "logps/chosen": -171.98529052734375, "logps/rejected": -200.46946716308594, "loss": 0.0797, "rewards/accuracies": 0.9375, "rewards/chosen": -0.43185409903526306, "rewards/margins": 7.274953842163086, "rewards/rejected": -7.7068071365356445, "step": 488 }, { "epoch": 0.17, "learning_rate": 1.9850731943190996e-06, "logits/chosen": -0.7417293190956116, "logits/rejected": -0.7073285579681396, "logps/chosen": -174.38888549804688, "logps/rejected": -267.31036376953125, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.6445594429969788, "rewards/margins": 8.249358177185059, "rewards/rejected": -8.89391803741455, "step": 489 }, { "epoch": 0.17, "learning_rate": 1.9849779029060294e-06, "logits/chosen": -0.7232129573822021, "logits/rejected": -0.6896932721138, "logps/chosen": -197.56935119628906, "logps/rejected": -240.70858764648438, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": -1.280663251876831, "rewards/margins": 7.2028632164001465, "rewards/rejected": -8.483526229858398, "step": 490 }, { "epoch": 0.17, "learning_rate": 1.984882310595802e-06, "logits/chosen": -0.7520974278450012, "logits/rejected": -0.7020655274391174, "logps/chosen": -254.9773406982422, "logps/rejected": -296.40716552734375, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": -0.8787598609924316, "rewards/margins": 7.919406890869141, "rewards/rejected": -8.79816722869873, "step": 491 }, { "epoch": 0.17, "learning_rate": 1.984786417417621e-06, "logits/chosen": -0.8329269289970398, "logits/rejected": -0.8298249244689941, "logps/chosen": -150.35519409179688, "logps/rejected": -221.38052368164062, "loss": 0.1055, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5197988152503967, "rewards/margins": 6.190133094787598, "rewards/rejected": -6.709931373596191, "step": 492 }, { "epoch": 0.17, "learning_rate": 1.9846902234007796e-06, "logits/chosen": -0.8211216926574707, "logits/rejected": -0.8009121417999268, "logps/chosen": -207.42398071289062, "logps/rejected": -286.21356201171875, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -0.11683203279972076, "rewards/margins": 7.341965675354004, "rewards/rejected": -7.458796977996826, "step": 493 }, { "epoch": 0.17, "learning_rate": 1.984593728574664e-06, "logits/chosen": -0.8845860958099365, "logits/rejected": -0.879582405090332, "logps/chosen": -140.54965209960938, "logps/rejected": -217.00091552734375, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": -1.017393946647644, "rewards/margins": 5.604002475738525, "rewards/rejected": -6.621397018432617, "step": 494 }, { "epoch": 0.17, "learning_rate": 1.9844969329687525e-06, "logits/chosen": -0.8490980863571167, "logits/rejected": -0.8377477526664734, "logps/chosen": -154.62501525878906, "logps/rejected": -216.2071075439453, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 0.35063934326171875, "rewards/margins": 6.815526008605957, "rewards/rejected": -6.46488618850708, "step": 495 }, { "epoch": 0.17, "learning_rate": 1.984399836612614e-06, "logits/chosen": -0.8795524835586548, "logits/rejected": -0.8612956404685974, "logps/chosen": -168.34915161132812, "logps/rejected": -216.27886962890625, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -0.7999699711799622, "rewards/margins": 5.859354496002197, "rewards/rejected": -6.659325122833252, "step": 496 }, { "epoch": 0.17, "learning_rate": 1.9843024395359104e-06, "logits/chosen": -0.8264440894126892, "logits/rejected": -0.8217537999153137, "logps/chosen": -172.89291381835938, "logps/rejected": -263.63482666015625, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.29687079787254333, "rewards/margins": 7.341008186340332, "rewards/rejected": -7.044137001037598, "step": 497 }, { "epoch": 0.17, "learning_rate": 1.9842047417683946e-06, "logits/chosen": -0.9514211416244507, "logits/rejected": -0.9271830916404724, "logps/chosen": -241.0152587890625, "logps/rejected": -287.61065673828125, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -0.0699535608291626, "rewards/margins": 7.782679080963135, "rewards/rejected": -7.852633476257324, "step": 498 }, { "epoch": 0.17, "learning_rate": 1.9841067433399133e-06, "logits/chosen": -0.8309442400932312, "logits/rejected": -0.8060449361801147, "logps/chosen": -214.54501342773438, "logps/rejected": -292.2445068359375, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": -0.717619776725769, "rewards/margins": 8.650822639465332, "rewards/rejected": -9.36844253540039, "step": 499 }, { "epoch": 0.17, "learning_rate": 1.9840084442804027e-06, "logits/chosen": -0.8232833743095398, "logits/rejected": -0.8099319338798523, "logps/chosen": -197.01300048828125, "logps/rejected": -220.05812072753906, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -0.28822705149650574, "rewards/margins": 5.884527206420898, "rewards/rejected": -6.172754287719727, "step": 500 }, { "epoch": 0.17, "learning_rate": 1.983909844619892e-06, "logits/chosen": -0.792079508304596, "logits/rejected": -0.7731711864471436, "logps/chosen": -264.4851379394531, "logps/rejected": -325.26556396484375, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -0.4490417540073395, "rewards/margins": 8.447075843811035, "rewards/rejected": -8.8961181640625, "step": 501 }, { "epoch": 0.17, "learning_rate": 1.9838109443885017e-06, "logits/chosen": -0.9240905046463013, "logits/rejected": -0.8942062258720398, "logps/chosen": -212.07875061035156, "logps/rejected": -208.76858520507812, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1712266504764557, "rewards/margins": 5.155272960662842, "rewards/rejected": -5.326499938964844, "step": 502 }, { "epoch": 0.17, "learning_rate": 1.9837117436164453e-06, "logits/chosen": -0.8610814809799194, "logits/rejected": -0.8454132080078125, "logps/chosen": -182.2493438720703, "logps/rejected": -267.21539306640625, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.40035271644592285, "rewards/margins": 6.4680352210998535, "rewards/rejected": -6.868387699127197, "step": 503 }, { "epoch": 0.17, "learning_rate": 1.9836122423340263e-06, "logits/chosen": -0.9108873009681702, "logits/rejected": -0.9050699472427368, "logps/chosen": -231.68612670898438, "logps/rejected": -319.0402526855469, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -0.5754705667495728, "rewards/margins": 6.762538433074951, "rewards/rejected": -7.338008880615234, "step": 504 }, { "epoch": 0.17, "learning_rate": 1.9835124405716413e-06, "logits/chosen": -0.842260479927063, "logits/rejected": -0.809815526008606, "logps/chosen": -234.9873046875, "logps/rejected": -272.3179016113281, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 0.6239476799964905, "rewards/margins": 8.389409065246582, "rewards/rejected": -7.765461444854736, "step": 505 }, { "epoch": 0.17, "learning_rate": 1.9834123383597794e-06, "logits/chosen": -0.9084736704826355, "logits/rejected": -0.871727466583252, "logps/chosen": -279.0525207519531, "logps/rejected": -288.1187744140625, "loss": 0.0852, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2003261148929596, "rewards/margins": 7.344881057739258, "rewards/rejected": -7.14455509185791, "step": 506 }, { "epoch": 0.17, "learning_rate": 1.9833119357290187e-06, "logits/chosen": -0.8885550498962402, "logits/rejected": -0.8789339065551758, "logps/chosen": -156.89019775390625, "logps/rejected": -279.3836975097656, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": -0.6902410984039307, "rewards/margins": 9.59125804901123, "rewards/rejected": -10.281498908996582, "step": 507 }, { "epoch": 0.17, "learning_rate": 1.9832112327100317e-06, "logits/chosen": -0.8630253672599792, "logits/rejected": -0.8556584715843201, "logps/chosen": -109.391845703125, "logps/rejected": -205.43341064453125, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.405306339263916, "rewards/margins": 6.86281681060791, "rewards/rejected": -7.268123626708984, "step": 508 }, { "epoch": 0.17, "learning_rate": 1.9831102293335818e-06, "logits/chosen": -0.704820990562439, "logits/rejected": -0.6989374756813049, "logps/chosen": -225.6845703125, "logps/rejected": -317.3202819824219, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -0.6346972584724426, "rewards/margins": 8.037118911743164, "rewards/rejected": -8.671815872192383, "step": 509 }, { "epoch": 0.17, "learning_rate": 1.983008925630524e-06, "logits/chosen": -0.8850758075714111, "logits/rejected": -0.8607745170593262, "logps/chosen": -224.96466064453125, "logps/rejected": -279.1891174316406, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.5585387945175171, "rewards/margins": 7.986962795257568, "rewards/rejected": -7.428423881530762, "step": 510 }, { "epoch": 0.17, "learning_rate": 1.9829073216318056e-06, "logits/chosen": -0.8603625893592834, "logits/rejected": -0.8410183787345886, "logps/chosen": -186.7845458984375, "logps/rejected": -232.09413146972656, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": -0.42794299125671387, "rewards/margins": 6.819163799285889, "rewards/rejected": -7.247106552124023, "step": 511 }, { "epoch": 0.17, "learning_rate": 1.9828054173684644e-06, "logits/chosen": -0.9141589999198914, "logits/rejected": -0.8954904675483704, "logps/chosen": -132.2564697265625, "logps/rejected": -205.70623779296875, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": 0.031228259205818176, "rewards/margins": 6.809196949005127, "rewards/rejected": -6.77796745300293, "step": 512 }, { "epoch": 0.18, "learning_rate": 1.982703212871631e-06, "logits/chosen": -0.876732587814331, "logits/rejected": -0.8524488210678101, "logps/chosen": -164.76788330078125, "logps/rejected": -214.42984008789062, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -0.7583277225494385, "rewards/margins": 5.664405822753906, "rewards/rejected": -6.422733306884766, "step": 513 }, { "epoch": 0.18, "learning_rate": 1.9826007081725282e-06, "logits/chosen": -0.9052053689956665, "logits/rejected": -0.905426561832428, "logps/chosen": -113.71932220458984, "logps/rejected": -186.57351684570312, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -0.06164756789803505, "rewards/margins": 6.21433162689209, "rewards/rejected": -6.275979995727539, "step": 514 }, { "epoch": 0.18, "learning_rate": 1.9824979033024693e-06, "logits/chosen": -0.865257203578949, "logits/rejected": -0.8309085369110107, "logps/chosen": -266.5993347167969, "logps/rejected": -300.2069396972656, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -0.1082642674446106, "rewards/margins": 6.539196014404297, "rewards/rejected": -6.647460460662842, "step": 515 }, { "epoch": 0.18, "learning_rate": 1.9823947982928594e-06, "logits/chosen": -0.9254615902900696, "logits/rejected": -0.8910333514213562, "logps/chosen": -191.50950622558594, "logps/rejected": -232.23045349121094, "loss": 0.083, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4413113594055176, "rewards/margins": 6.946434497833252, "rewards/rejected": -7.3877458572387695, "step": 516 }, { "epoch": 0.18, "learning_rate": 1.982291393175196e-06, "logits/chosen": -0.8490796685218811, "logits/rejected": -0.8526163101196289, "logps/chosen": -134.9854278564453, "logps/rejected": -232.2645263671875, "loss": 0.0773, "rewards/accuracies": 1.0, "rewards/chosen": -1.6282669305801392, "rewards/margins": 5.6876091957092285, "rewards/rejected": -7.315876007080078, "step": 517 }, { "epoch": 0.18, "learning_rate": 1.982187687981068e-06, "logits/chosen": -0.8547233939170837, "logits/rejected": -0.8448018431663513, "logps/chosen": -161.18978881835938, "logps/rejected": -231.89364624023438, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -0.06826463341712952, "rewards/margins": 6.621318817138672, "rewards/rejected": -6.689583778381348, "step": 518 }, { "epoch": 0.18, "learning_rate": 1.982083682742156e-06, "logits/chosen": -0.9489890336990356, "logits/rejected": -0.9452912211418152, "logps/chosen": -152.32150268554688, "logps/rejected": -230.37376403808594, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": -0.8102610111236572, "rewards/margins": 6.493147850036621, "rewards/rejected": -7.303409099578857, "step": 519 }, { "epoch": 0.18, "learning_rate": 1.9819793774902316e-06, "logits/chosen": -0.9235274195671082, "logits/rejected": -0.8965332508087158, "logps/chosen": -210.67648315429688, "logps/rejected": -251.85040283203125, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -0.309755802154541, "rewards/margins": 8.167367935180664, "rewards/rejected": -8.477124214172363, "step": 520 }, { "epoch": 0.18, "learning_rate": 1.981874772257159e-06, "logits/chosen": -1.0022199153900146, "logits/rejected": -0.9788553714752197, "logps/chosen": -239.05467224121094, "logps/rejected": -339.3393249511719, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": 0.5821305513381958, "rewards/margins": 9.840813636779785, "rewards/rejected": -9.258683204650879, "step": 521 }, { "epoch": 0.18, "learning_rate": 1.981769867074894e-06, "logits/chosen": -0.9364277124404907, "logits/rejected": -0.928251326084137, "logps/chosen": -181.97421264648438, "logps/rejected": -284.6482849121094, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": -0.12427850067615509, "rewards/margins": 9.530743598937988, "rewards/rejected": -9.655021667480469, "step": 522 }, { "epoch": 0.18, "learning_rate": 1.981664661975483e-06, "logits/chosen": -1.0029454231262207, "logits/rejected": -0.9870211482048035, "logps/chosen": -188.6385040283203, "logps/rejected": -276.2286071777344, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -1.2418341636657715, "rewards/margins": 8.061290740966797, "rewards/rejected": -9.30312442779541, "step": 523 }, { "epoch": 0.18, "learning_rate": 1.9815591569910653e-06, "logits/chosen": -0.8597974181175232, "logits/rejected": -0.8489859700202942, "logps/chosen": -178.06512451171875, "logps/rejected": -208.82794189453125, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.19019389152526855, "rewards/margins": 7.432736873626709, "rewards/rejected": -7.622931003570557, "step": 524 }, { "epoch": 0.18, "learning_rate": 1.981453352153871e-06, "logits/chosen": -0.88055419921875, "logits/rejected": -0.8696951866149902, "logps/chosen": -219.28219604492188, "logps/rejected": -315.10888671875, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -1.0121570825576782, "rewards/margins": 9.568349838256836, "rewards/rejected": -10.58050537109375, "step": 525 }, { "epoch": 0.18, "learning_rate": 1.9813472474962215e-06, "logits/chosen": -0.7698714733123779, "logits/rejected": -0.7540116906166077, "logps/chosen": -238.16976928710938, "logps/rejected": -335.2927551269531, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -0.12496151775121689, "rewards/margins": 9.0509614944458, "rewards/rejected": -9.175922393798828, "step": 526 }, { "epoch": 0.18, "learning_rate": 1.9812408430505312e-06, "logits/chosen": -0.9325400590896606, "logits/rejected": -0.9171484708786011, "logps/chosen": -242.84866333007812, "logps/rejected": -292.2442626953125, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -0.02935659885406494, "rewards/margins": 6.397525310516357, "rewards/rejected": -6.426882743835449, "step": 527 }, { "epoch": 0.18, "learning_rate": 1.981134138849304e-06, "logits/chosen": -0.9371970891952515, "logits/rejected": -0.9154884815216064, "logps/chosen": -191.1331787109375, "logps/rejected": -230.30970764160156, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": -0.9903026819229126, "rewards/margins": 6.405021667480469, "rewards/rejected": -7.395323753356934, "step": 528 }, { "epoch": 0.18, "learning_rate": 1.9810271349251384e-06, "logits/chosen": -0.915915846824646, "logits/rejected": -0.8901860117912292, "logps/chosen": -197.29263305664062, "logps/rejected": -214.48806762695312, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -0.7444829940795898, "rewards/margins": 6.794285297393799, "rewards/rejected": -7.538768291473389, "step": 529 }, { "epoch": 0.18, "learning_rate": 1.9809198313107213e-06, "logits/chosen": -0.9734172224998474, "logits/rejected": -0.945885419845581, "logps/chosen": -190.95018005371094, "logps/rejected": -300.97442626953125, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -0.15116652846336365, "rewards/margins": 9.422913551330566, "rewards/rejected": -9.574080467224121, "step": 530 }, { "epoch": 0.18, "learning_rate": 1.9808122280388323e-06, "logits/chosen": -0.8753033876419067, "logits/rejected": -0.8617364764213562, "logps/chosen": -188.55523681640625, "logps/rejected": -252.96746826171875, "loss": 0.0697, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8051134347915649, "rewards/margins": 6.5658955574035645, "rewards/rejected": -7.371008396148682, "step": 531 }, { "epoch": 0.18, "learning_rate": 1.9807043251423433e-06, "logits/chosen": -0.9052034020423889, "logits/rejected": -0.8796579241752625, "logps/chosen": -172.1426544189453, "logps/rejected": -182.67935180664062, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -0.2920554280281067, "rewards/margins": 6.03493595123291, "rewards/rejected": -6.326991081237793, "step": 532 }, { "epoch": 0.18, "learning_rate": 1.980596122654217e-06, "logits/chosen": -0.940983235836029, "logits/rejected": -0.919514536857605, "logps/chosen": -229.51434326171875, "logps/rejected": -342.7462463378906, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -1.5960955619812012, "rewards/margins": 8.907368659973145, "rewards/rejected": -10.503464698791504, "step": 533 }, { "epoch": 0.18, "learning_rate": 1.980487620607508e-06, "logits/chosen": -0.9015532732009888, "logits/rejected": -0.8956538438796997, "logps/chosen": -202.32778930664062, "logps/rejected": -282.4217529296875, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -0.35866934061050415, "rewards/margins": 6.698148250579834, "rewards/rejected": -7.056817531585693, "step": 534 }, { "epoch": 0.18, "learning_rate": 1.9803788190353616e-06, "logits/chosen": -0.9051293730735779, "logits/rejected": -0.8739778399467468, "logps/chosen": -254.40692138671875, "logps/rejected": -329.4895324707031, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -0.5553094744682312, "rewards/margins": 9.463028907775879, "rewards/rejected": -10.018339157104492, "step": 535 }, { "epoch": 0.18, "learning_rate": 1.9802697179710158e-06, "logits/chosen": -0.8114553093910217, "logits/rejected": -0.7899676561355591, "logps/chosen": -193.46865844726562, "logps/rejected": -321.85491943359375, "loss": 0.1383, "rewards/accuracies": 1.0, "rewards/chosen": -1.3991461992263794, "rewards/margins": 9.6104736328125, "rewards/rejected": -11.009620666503906, "step": 536 }, { "epoch": 0.18, "learning_rate": 1.980160317447799e-06, "logits/chosen": -0.9619380235671997, "logits/rejected": -0.9266243577003479, "logps/chosen": -256.73638916015625, "logps/rejected": -337.66302490234375, "loss": 0.0704, "rewards/accuracies": 0.9375, "rewards/chosen": -1.256508469581604, "rewards/margins": 9.63757610321045, "rewards/rejected": -10.894084930419922, "step": 537 }, { "epoch": 0.18, "learning_rate": 1.9800506174991318e-06, "logits/chosen": -0.9234029650688171, "logits/rejected": -0.9066125154495239, "logps/chosen": -227.55996704101562, "logps/rejected": -320.4125671386719, "loss": 0.0398, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2516433000564575, "rewards/margins": 10.120457649230957, "rewards/rejected": -11.372100830078125, "step": 538 }, { "epoch": 0.18, "learning_rate": 1.9799406181585257e-06, "logits/chosen": -0.810072660446167, "logits/rejected": -0.8038988709449768, "logps/chosen": -137.40428161621094, "logps/rejected": -209.26119995117188, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.37514883279800415, "rewards/margins": 6.930599212646484, "rewards/rejected": -7.30574893951416, "step": 539 }, { "epoch": 0.18, "learning_rate": 1.9798303194595845e-06, "logits/chosen": -1.02340829372406, "logits/rejected": -1.0046260356903076, "logps/chosen": -142.86441040039062, "logps/rejected": -247.39529418945312, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -1.1890840530395508, "rewards/margins": 8.426250457763672, "rewards/rejected": -9.615333557128906, "step": 540 }, { "epoch": 0.18, "learning_rate": 1.9797197214360027e-06, "logits/chosen": -0.827226996421814, "logits/rejected": -0.8176074028015137, "logps/chosen": -169.97271728515625, "logps/rejected": -267.85955810546875, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -0.33802881836891174, "rewards/margins": 8.766654014587402, "rewards/rejected": -9.104683876037598, "step": 541 }, { "epoch": 0.18, "learning_rate": 1.979608824121566e-06, "logits/chosen": -0.9392085671424866, "logits/rejected": -0.9171081185340881, "logps/chosen": -209.32994079589844, "logps/rejected": -260.8291015625, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -1.8635321855545044, "rewards/margins": 7.250520706176758, "rewards/rejected": -9.114051818847656, "step": 542 }, { "epoch": 0.19, "learning_rate": 1.979497627550153e-06, "logits/chosen": -0.9392709136009216, "logits/rejected": -0.9126700758934021, "logps/chosen": -132.40505981445312, "logps/rejected": -206.362060546875, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -0.14804036915302277, "rewards/margins": 8.352813720703125, "rewards/rejected": -8.5008544921875, "step": 543 }, { "epoch": 0.19, "learning_rate": 1.979386131755732e-06, "logits/chosen": -0.7421759366989136, "logits/rejected": -0.7168218493461609, "logps/chosen": -253.7222442626953, "logps/rejected": -344.14788818359375, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -0.37344375252723694, "rewards/margins": 9.105628967285156, "rewards/rejected": -9.479071617126465, "step": 544 }, { "epoch": 0.19, "learning_rate": 1.979274336772363e-06, "logits/chosen": -0.8678727149963379, "logits/rejected": -0.8610647320747375, "logps/chosen": -150.6639862060547, "logps/rejected": -261.6693420410156, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -1.6714231967926025, "rewards/margins": 6.3902459144592285, "rewards/rejected": -8.061668395996094, "step": 545 }, { "epoch": 0.19, "learning_rate": 1.979162242634199e-06, "logits/chosen": -0.912902295589447, "logits/rejected": -0.885590136051178, "logps/chosen": -256.8582458496094, "logps/rejected": -318.90496826171875, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.25130724906921387, "rewards/margins": 9.222685813903809, "rewards/rejected": -9.473993301391602, "step": 546 }, { "epoch": 0.19, "learning_rate": 1.9790498493754824e-06, "logits/chosen": -0.9113664627075195, "logits/rejected": -0.9085156321525574, "logps/chosen": -161.24571228027344, "logps/rejected": -250.35015869140625, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -0.15714213252067566, "rewards/margins": 8.586195945739746, "rewards/rejected": -8.743337631225586, "step": 547 }, { "epoch": 0.19, "learning_rate": 1.978937157030548e-06, "logits/chosen": -0.8911148309707642, "logits/rejected": -0.87481290102005, "logps/chosen": -117.56627655029297, "logps/rejected": -176.89004516601562, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.3935934901237488, "rewards/margins": 7.327378273010254, "rewards/rejected": -7.720972061157227, "step": 548 }, { "epoch": 0.19, "learning_rate": 1.978824165633822e-06, "logits/chosen": -0.9258784055709839, "logits/rejected": -0.9164153337478638, "logps/chosen": -112.32698059082031, "logps/rejected": -175.99652099609375, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": -0.37859269976615906, "rewards/margins": 5.877255439758301, "rewards/rejected": -6.255848407745361, "step": 549 }, { "epoch": 0.19, "learning_rate": 1.9787108752198215e-06, "logits/chosen": -0.9304454922676086, "logits/rejected": -0.9042580127716064, "logps/chosen": -175.84695434570312, "logps/rejected": -279.6517028808594, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": 0.3360466957092285, "rewards/margins": 8.537481307983398, "rewards/rejected": -8.201435089111328, "step": 550 }, { "epoch": 0.19, "learning_rate": 1.978597285823155e-06, "logits/chosen": -1.0066235065460205, "logits/rejected": -0.9833400249481201, "logps/chosen": -227.57481384277344, "logps/rejected": -277.92852783203125, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.2308703660964966, "rewards/margins": 7.267498016357422, "rewards/rejected": -8.498368263244629, "step": 551 }, { "epoch": 0.19, "learning_rate": 1.9784833974785224e-06, "logits/chosen": -0.9133647084236145, "logits/rejected": -0.8916798830032349, "logps/chosen": -174.1064453125, "logps/rejected": -242.5067901611328, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.054004743695259094, "rewards/margins": 8.588318824768066, "rewards/rejected": -8.64232349395752, "step": 552 }, { "epoch": 0.19, "learning_rate": 1.978369210220715e-06, "logits/chosen": -0.9374479651451111, "logits/rejected": -0.9086722135543823, "logps/chosen": -168.451416015625, "logps/rejected": -183.9095916748047, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -0.3587082624435425, "rewards/margins": 6.553563594818115, "rewards/rejected": -6.912271499633789, "step": 553 }, { "epoch": 0.19, "learning_rate": 1.9782547240846163e-06, "logits/chosen": -0.8765919208526611, "logits/rejected": -0.8600260019302368, "logps/chosen": -174.38287353515625, "logps/rejected": -229.52294921875, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -0.579657256603241, "rewards/margins": 7.787848472595215, "rewards/rejected": -8.36750602722168, "step": 554 }, { "epoch": 0.19, "learning_rate": 1.9781399391051993e-06, "logits/chosen": -0.9539182186126709, "logits/rejected": -0.928210973739624, "logps/chosen": -156.47747802734375, "logps/rejected": -188.8240203857422, "loss": 0.046, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7281794548034668, "rewards/margins": 7.0597405433654785, "rewards/rejected": -7.787919998168945, "step": 555 }, { "epoch": 0.19, "learning_rate": 1.97802485531753e-06, "logits/chosen": -1.076371192932129, "logits/rejected": -1.0550885200500488, "logps/chosen": -181.69927978515625, "logps/rejected": -239.95745849609375, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -1.00959312915802, "rewards/margins": 6.350505352020264, "rewards/rejected": -7.360098361968994, "step": 556 }, { "epoch": 0.19, "learning_rate": 1.9779094727567636e-06, "logits/chosen": -0.8798007369041443, "logits/rejected": -0.860223650932312, "logps/chosen": -244.0191650390625, "logps/rejected": -397.8773193359375, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -0.8656082153320312, "rewards/margins": 10.427814483642578, "rewards/rejected": -11.29342269897461, "step": 557 }, { "epoch": 0.19, "learning_rate": 1.977793791458149e-06, "logits/chosen": -0.9693994522094727, "logits/rejected": -0.9442362785339355, "logps/chosen": -167.87057495117188, "logps/rejected": -205.66639709472656, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -0.9637816548347473, "rewards/margins": 5.975922584533691, "rewards/rejected": -6.939703941345215, "step": 558 }, { "epoch": 0.19, "learning_rate": 1.9776778114570253e-06, "logits/chosen": -0.9617966413497925, "logits/rejected": -0.9230554699897766, "logps/chosen": -198.11819458007812, "logps/rejected": -236.7311248779297, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -0.2411511391401291, "rewards/margins": 7.472568511962891, "rewards/rejected": -7.713719844818115, "step": 559 }, { "epoch": 0.19, "learning_rate": 1.977561532788822e-06, "logits/chosen": -0.9507661461830139, "logits/rejected": -0.9366797804832458, "logps/chosen": -172.19105529785156, "logps/rejected": -246.1509246826172, "loss": 0.0724, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1865172386169434, "rewards/margins": 6.127049446105957, "rewards/rejected": -7.313566207885742, "step": 560 }, { "epoch": 0.19, "learning_rate": 1.977444955489061e-06, "logits/chosen": -0.8006594777107239, "logits/rejected": -0.7821227312088013, "logps/chosen": -169.7612762451172, "logps/rejected": -291.9988098144531, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -0.6453441381454468, "rewards/margins": 9.027948379516602, "rewards/rejected": -9.673293113708496, "step": 561 }, { "epoch": 0.19, "learning_rate": 1.977328079593356e-06, "logits/chosen": -0.9961028695106506, "logits/rejected": -0.9915962219238281, "logps/chosen": -220.4300537109375, "logps/rejected": -315.53326416015625, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -0.20601308345794678, "rewards/margins": 8.249069213867188, "rewards/rejected": -8.455081939697266, "step": 562 }, { "epoch": 0.19, "learning_rate": 1.9772109051374095e-06, "logits/chosen": -0.9062607288360596, "logits/rejected": -0.8814750909805298, "logps/chosen": -213.96192932128906, "logps/rejected": -313.00421142578125, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -0.9628427028656006, "rewards/margins": 8.235588073730469, "rewards/rejected": -9.198431015014648, "step": 563 }, { "epoch": 0.19, "learning_rate": 1.977093432157017e-06, "logits/chosen": -1.0248515605926514, "logits/rejected": -0.9878000617027283, "logps/chosen": -189.70394897460938, "logps/rejected": -252.51776123046875, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -0.3634791970252991, "rewards/margins": 8.636374473571777, "rewards/rejected": -8.999853134155273, "step": 564 }, { "epoch": 0.19, "learning_rate": 1.9769756606880657e-06, "logits/chosen": -1.0135250091552734, "logits/rejected": -1.0023472309112549, "logps/chosen": -95.75526428222656, "logps/rejected": -174.93167114257812, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.381362646818161, "rewards/margins": 6.440376281738281, "rewards/rejected": -6.821739196777344, "step": 565 }, { "epoch": 0.19, "learning_rate": 1.9768575907665324e-06, "logits/chosen": -0.958276629447937, "logits/rejected": -0.9238964319229126, "logps/chosen": -191.67027282714844, "logps/rejected": -232.26673889160156, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -0.8194931745529175, "rewards/margins": 7.7358245849609375, "rewards/rejected": -8.555317878723145, "step": 566 }, { "epoch": 0.19, "learning_rate": 1.976739222428486e-06, "logits/chosen": -0.9778825044631958, "logits/rejected": -0.9242126941680908, "logps/chosen": -252.29437255859375, "logps/rejected": -280.0216064453125, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": -0.8956327438354492, "rewards/margins": 9.196538925170898, "rewards/rejected": -10.092171669006348, "step": 567 }, { "epoch": 0.19, "learning_rate": 1.9766205557100868e-06, "logits/chosen": -0.9719224572181702, "logits/rejected": -0.9540631771087646, "logps/chosen": -197.599365234375, "logps/rejected": -293.01495361328125, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.4349590539932251, "rewards/margins": 9.072507858276367, "rewards/rejected": -9.507467269897461, "step": 568 }, { "epoch": 0.19, "learning_rate": 1.976501590647585e-06, "logits/chosen": -0.9903542399406433, "logits/rejected": -0.9684469699859619, "logps/chosen": -198.8714599609375, "logps/rejected": -237.67611694335938, "loss": 0.0492, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5406200289726257, "rewards/margins": 6.72774076461792, "rewards/rejected": -7.2683610916137695, "step": 569 }, { "epoch": 0.19, "learning_rate": 1.9763823272773234e-06, "logits/chosen": -0.9596062302589417, "logits/rejected": -0.9253125190734863, "logps/chosen": -246.28564453125, "logps/rejected": -305.29461669921875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.4843847751617432, "rewards/margins": 9.585933685302734, "rewards/rejected": -11.070318222045898, "step": 570 }, { "epoch": 0.19, "learning_rate": 1.9762627656357355e-06, "logits/chosen": -1.011143684387207, "logits/rejected": -0.9880803823471069, "logps/chosen": -255.18441772460938, "logps/rejected": -354.7326965332031, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -1.103240728378296, "rewards/margins": 11.733356475830078, "rewards/rejected": -12.836597442626953, "step": 571 }, { "epoch": 0.2, "learning_rate": 1.9761429057593453e-06, "logits/chosen": -0.9594974517822266, "logits/rejected": -0.9393374919891357, "logps/chosen": -253.50735473632812, "logps/rejected": -351.53155517578125, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -1.4126747846603394, "rewards/margins": 8.617013931274414, "rewards/rejected": -10.029688835144043, "step": 572 }, { "epoch": 0.2, "learning_rate": 1.9760227476847684e-06, "logits/chosen": -1.081338882446289, "logits/rejected": -1.0480647087097168, "logps/chosen": -265.3971862792969, "logps/rejected": -282.2193603515625, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -1.2920128107070923, "rewards/margins": 8.256115913391113, "rewards/rejected": -9.548128128051758, "step": 573 }, { "epoch": 0.2, "learning_rate": 1.975902291448711e-06, "logits/chosen": -0.911215603351593, "logits/rejected": -0.8815644383430481, "logps/chosen": -162.95700073242188, "logps/rejected": -190.7410125732422, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.3827114999294281, "rewards/margins": 7.170231819152832, "rewards/rejected": -7.552942752838135, "step": 574 }, { "epoch": 0.2, "learning_rate": 1.9757815370879716e-06, "logits/chosen": -0.9577271938323975, "logits/rejected": -0.9218797087669373, "logps/chosen": -196.2116241455078, "logps/rejected": -218.15545654296875, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -1.1563133001327515, "rewards/margins": 7.2887725830078125, "rewards/rejected": -8.445085525512695, "step": 575 }, { "epoch": 0.2, "learning_rate": 1.975660484639439e-06, "logits/chosen": -0.9522220492362976, "logits/rejected": -0.9340742230415344, "logps/chosen": -234.8391876220703, "logps/rejected": -318.7571105957031, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -1.2350248098373413, "rewards/margins": 8.951399803161621, "rewards/rejected": -10.186426162719727, "step": 576 }, { "epoch": 0.2, "learning_rate": 1.9755391341400927e-06, "logits/chosen": -0.9958475828170776, "logits/rejected": -0.9536238312721252, "logps/chosen": -235.7689666748047, "logps/rejected": -277.476806640625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.798622727394104, "rewards/margins": 9.799022674560547, "rewards/rejected": -10.59764575958252, "step": 577 }, { "epoch": 0.2, "learning_rate": 1.9754174856270034e-06, "logits/chosen": -0.9551845788955688, "logits/rejected": -0.941169023513794, "logps/chosen": -218.2157745361328, "logps/rejected": -270.4477233886719, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.09823351353406906, "rewards/margins": 7.476345539093018, "rewards/rejected": -7.574578762054443, "step": 578 }, { "epoch": 0.2, "learning_rate": 1.9752955391373332e-06, "logits/chosen": -0.8856111168861389, "logits/rejected": -0.8601957559585571, "logps/chosen": -218.3873291015625, "logps/rejected": -231.0337677001953, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.4636993110179901, "rewards/margins": 6.799783229827881, "rewards/rejected": -7.263482093811035, "step": 579 }, { "epoch": 0.2, "learning_rate": 1.9751732947083356e-06, "logits/chosen": -0.9087603092193604, "logits/rejected": -0.8690183758735657, "logps/chosen": -245.56045532226562, "logps/rejected": -266.3992004394531, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -0.6044809818267822, "rewards/margins": 7.069736957550049, "rewards/rejected": -7.674217224121094, "step": 580 }, { "epoch": 0.2, "learning_rate": 1.975050752377354e-06, "logits/chosen": -1.0010102987289429, "logits/rejected": -0.9747340083122253, "logps/chosen": -192.23316955566406, "logps/rejected": -245.6748809814453, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.3190458416938782, "rewards/margins": 7.542194366455078, "rewards/rejected": -7.861239910125732, "step": 581 }, { "epoch": 0.2, "learning_rate": 1.9749279121818236e-06, "logits/chosen": -0.9890163540840149, "logits/rejected": -0.950920820236206, "logps/chosen": -244.2736053466797, "logps/rejected": -305.4094543457031, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -0.9383690357208252, "rewards/margins": 9.227209091186523, "rewards/rejected": -10.16557788848877, "step": 582 }, { "epoch": 0.2, "learning_rate": 1.97480477415927e-06, "logits/chosen": -0.9114074110984802, "logits/rejected": -0.8869712948799133, "logps/chosen": -187.42324829101562, "logps/rejected": -290.5890197753906, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -1.0464011430740356, "rewards/margins": 10.192665100097656, "rewards/rejected": -11.239066123962402, "step": 583 }, { "epoch": 0.2, "learning_rate": 1.974681338347311e-06, "logits/chosen": -0.9139357209205627, "logits/rejected": -0.8859570026397705, "logps/chosen": -175.5563201904297, "logps/rejected": -214.33096313476562, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.3859297037124634, "rewards/margins": 5.487331867218018, "rewards/rejected": -6.873261451721191, "step": 584 }, { "epoch": 0.2, "learning_rate": 1.9745576047836538e-06, "logits/chosen": -1.0707844495773315, "logits/rejected": -1.0603832006454468, "logps/chosen": -151.35191345214844, "logps/rejected": -245.99960327148438, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.20782306790351868, "rewards/margins": 10.013855934143066, "rewards/rejected": -10.221678733825684, "step": 585 }, { "epoch": 0.2, "learning_rate": 1.9744335735060973e-06, "logits/chosen": -1.0099369287490845, "logits/rejected": -0.9884259700775146, "logps/chosen": -208.12722778320312, "logps/rejected": -286.5820007324219, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.15271680057048798, "rewards/margins": 8.854594230651855, "rewards/rejected": -8.701878547668457, "step": 586 }, { "epoch": 0.2, "learning_rate": 1.974309244552532e-06, "logits/chosen": -0.9468649625778198, "logits/rejected": -0.9351252317428589, "logps/chosen": -274.53106689453125, "logps/rejected": -389.58734130859375, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -1.098317265510559, "rewards/margins": 8.65971565246582, "rewards/rejected": -9.758031845092773, "step": 587 }, { "epoch": 0.2, "learning_rate": 1.9741846179609378e-06, "logits/chosen": -1.0754188299179077, "logits/rejected": -1.0471792221069336, "logps/chosen": -167.64120483398438, "logps/rejected": -191.2539825439453, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -0.7557682991027832, "rewards/margins": 6.284139156341553, "rewards/rejected": -7.039907455444336, "step": 588 }, { "epoch": 0.2, "learning_rate": 1.974059693769387e-06, "logits/chosen": -0.9537057876586914, "logits/rejected": -0.9239873886108398, "logps/chosen": -234.03488159179688, "logps/rejected": -289.7364807128906, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -0.882801353931427, "rewards/margins": 9.146498680114746, "rewards/rejected": -10.029300689697266, "step": 589 }, { "epoch": 0.2, "learning_rate": 1.973934472016042e-06, "logits/chosen": -1.0049831867218018, "logits/rejected": -0.9801930785179138, "logps/chosen": -234.75074768066406, "logps/rejected": -251.31369018554688, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479559183120728, "rewards/margins": 8.42165470123291, "rewards/rejected": -9.169610023498535, "step": 590 }, { "epoch": 0.2, "learning_rate": 1.9738089527391564e-06, "logits/chosen": -0.9867586493492126, "logits/rejected": -0.9652100801467896, "logps/chosen": -201.8011474609375, "logps/rejected": -285.25946044921875, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -2.0454249382019043, "rewards/margins": 8.284215927124023, "rewards/rejected": -10.32964038848877, "step": 591 }, { "epoch": 0.2, "learning_rate": 1.973683135977075e-06, "logits/chosen": -0.943473756313324, "logits/rejected": -0.9154087901115417, "logps/chosen": -168.3541717529297, "logps/rejected": -250.32164001464844, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -1.1235935688018799, "rewards/margins": 5.819122791290283, "rewards/rejected": -6.942716598510742, "step": 592 }, { "epoch": 0.2, "learning_rate": 1.9735570217682325e-06, "logits/chosen": -1.0778053998947144, "logits/rejected": -1.052024245262146, "logps/chosen": -240.86468505859375, "logps/rejected": -294.9212646484375, "loss": 0.062, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8271925449371338, "rewards/margins": 6.583497524261475, "rewards/rejected": -8.410691261291504, "step": 593 }, { "epoch": 0.2, "learning_rate": 1.973430610151155e-06, "logits/chosen": -0.9289827942848206, "logits/rejected": -0.9044557213783264, "logps/chosen": -167.33599853515625, "logps/rejected": -238.886474609375, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.7863651514053345, "rewards/margins": 6.15611457824707, "rewards/rejected": -6.942480087280273, "step": 594 }, { "epoch": 0.2, "learning_rate": 1.97330390116446e-06, "logits/chosen": -0.9378933310508728, "logits/rejected": -0.9299591183662415, "logps/chosen": -200.01304626464844, "logps/rejected": -303.7030029296875, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.5136091709136963, "rewards/margins": 10.421297073364258, "rewards/rejected": -10.934906959533691, "step": 595 }, { "epoch": 0.2, "learning_rate": 1.973176894846855e-06, "logits/chosen": -0.9351301193237305, "logits/rejected": -0.9018999934196472, "logps/chosen": -260.4922180175781, "logps/rejected": -340.5794677734375, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": -0.2570006847381592, "rewards/margins": 10.222328186035156, "rewards/rejected": -10.479329109191895, "step": 596 }, { "epoch": 0.2, "learning_rate": 1.9730495912371383e-06, "logits/chosen": -0.9731868505477905, "logits/rejected": -0.9310501217842102, "logps/chosen": -184.2296600341797, "logps/rejected": -250.18759155273438, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.1565263271331787, "rewards/margins": 7.607501029968262, "rewards/rejected": -9.764028549194336, "step": 597 }, { "epoch": 0.2, "learning_rate": 1.9729219903742002e-06, "logits/chosen": -0.8967821598052979, "logits/rejected": -0.8653547763824463, "logps/chosen": -192.1979217529297, "logps/rejected": -253.4744873046875, "loss": 0.0539, "rewards/accuracies": 0.9375, "rewards/chosen": -0.57643723487854, "rewards/margins": 8.608088493347168, "rewards/rejected": -9.184525489807129, "step": 598 }, { "epoch": 0.2, "learning_rate": 1.972794092297021e-06, "logits/chosen": -1.022811770439148, "logits/rejected": -0.9952086210250854, "logps/chosen": -219.38400268554688, "logps/rejected": -310.1427917480469, "loss": 0.0308, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8344107866287231, "rewards/margins": 10.127132415771484, "rewards/rejected": -10.961544036865234, "step": 599 }, { "epoch": 0.2, "learning_rate": 1.972665897044671e-06, "logits/chosen": -0.9707885384559631, "logits/rejected": -0.9590129852294922, "logps/chosen": -178.56845092773438, "logps/rejected": -251.761474609375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.2846386432647705, "rewards/margins": 7.751656532287598, "rewards/rejected": -9.036294937133789, "step": 600 }, { "epoch": 0.21, "learning_rate": 1.972537404656313e-06, "logits/chosen": -1.0164992809295654, "logits/rejected": -0.9974066019058228, "logps/chosen": -149.46841430664062, "logps/rejected": -222.28457641601562, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.6719133257865906, "rewards/margins": 8.830055236816406, "rewards/rejected": -9.501967430114746, "step": 601 }, { "epoch": 0.21, "learning_rate": 1.972408615171199e-06, "logits/chosen": -1.0113102197647095, "logits/rejected": -0.9747132658958435, "logps/chosen": -157.70257568359375, "logps/rejected": -223.802490234375, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": -0.26172730326652527, "rewards/margins": 8.799344062805176, "rewards/rejected": -9.061071395874023, "step": 602 }, { "epoch": 0.21, "learning_rate": 1.972279528628672e-06, "logits/chosen": -1.0438876152038574, "logits/rejected": -1.0114108324050903, "logps/chosen": -228.44512939453125, "logps/rejected": -294.5398254394531, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -1.1460483074188232, "rewards/margins": 9.932459831237793, "rewards/rejected": -11.078508377075195, "step": 603 }, { "epoch": 0.21, "learning_rate": 1.9721501450681673e-06, "logits/chosen": -1.029937982559204, "logits/rejected": -1.0086390972137451, "logps/chosen": -173.5872344970703, "logps/rejected": -232.41525268554688, "loss": 0.0337, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5911132097244263, "rewards/margins": 8.072345733642578, "rewards/rejected": -9.663458824157715, "step": 604 }, { "epoch": 0.21, "learning_rate": 1.9720204645292093e-06, "logits/chosen": -0.9468083381652832, "logits/rejected": -0.9209327697753906, "logps/chosen": -176.9317169189453, "logps/rejected": -246.2593231201172, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.3021259307861328, "rewards/margins": 8.322111129760742, "rewards/rejected": -8.624237060546875, "step": 605 }, { "epoch": 0.21, "learning_rate": 1.9718904870514137e-06, "logits/chosen": -0.9121202230453491, "logits/rejected": -0.8783060908317566, "logps/chosen": -165.370361328125, "logps/rejected": -211.60662841796875, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.47333019971847534, "rewards/margins": 6.087613105773926, "rewards/rejected": -6.560943126678467, "step": 606 }, { "epoch": 0.21, "learning_rate": 1.971760212674486e-06, "logits/chosen": -0.8815883994102478, "logits/rejected": -0.8453642725944519, "logps/chosen": -211.7491455078125, "logps/rejected": -286.4767150878906, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": -1.2215931415557861, "rewards/margins": 9.75085735321045, "rewards/rejected": -10.972450256347656, "step": 607 }, { "epoch": 0.21, "learning_rate": 1.9716296414382245e-06, "logits/chosen": -1.0643197298049927, "logits/rejected": -1.0533605813980103, "logps/chosen": -199.9147491455078, "logps/rejected": -302.072509765625, "loss": 0.0818, "rewards/accuracies": 1.0, "rewards/chosen": -0.8357186913490295, "rewards/margins": 9.640911102294922, "rewards/rejected": -10.476628303527832, "step": 608 }, { "epoch": 0.21, "learning_rate": 1.971498773382516e-06, "logits/chosen": -1.0254924297332764, "logits/rejected": -1.0075660943984985, "logps/chosen": -252.7603302001953, "logps/rejected": -366.17578125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.9109262824058533, "rewards/margins": 11.277521133422852, "rewards/rejected": -12.188447952270508, "step": 609 }, { "epoch": 0.21, "learning_rate": 1.9713676085473393e-06, "logits/chosen": -0.9914659857749939, "logits/rejected": -0.9545600414276123, "logps/chosen": -202.49282836914062, "logps/rejected": -278.64190673828125, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -1.0101003646850586, "rewards/margins": 8.79917049407959, "rewards/rejected": -9.809270858764648, "step": 610 }, { "epoch": 0.21, "learning_rate": 1.971236146972764e-06, "logits/chosen": -1.0120692253112793, "logits/rejected": -0.9995175004005432, "logps/chosen": -182.0583038330078, "logps/rejected": -259.3840637207031, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -2.1303601264953613, "rewards/margins": 8.22620964050293, "rewards/rejected": -10.356571197509766, "step": 611 }, { "epoch": 0.21, "learning_rate": 1.971104388698948e-06, "logits/chosen": -1.010457158088684, "logits/rejected": -0.9906308054924011, "logps/chosen": -173.52369689941406, "logps/rejected": -255.98768615722656, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": -1.393603801727295, "rewards/margins": 7.807736396789551, "rewards/rejected": -9.201339721679688, "step": 612 }, { "epoch": 0.21, "learning_rate": 1.9709723337661436e-06, "logits/chosen": -0.9233645796775818, "logits/rejected": -0.8831697702407837, "logps/chosen": -198.03472900390625, "logps/rejected": -228.46975708007812, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765930771827698, "rewards/margins": 7.142789363861084, "rewards/rejected": -8.119382858276367, "step": 613 }, { "epoch": 0.21, "learning_rate": 1.970839982214691e-06, "logits/chosen": -0.9302282929420471, "logits/rejected": -0.9107106328010559, "logps/chosen": -217.5958709716797, "logps/rejected": -260.8446044921875, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -1.8260362148284912, "rewards/margins": 7.508603096008301, "rewards/rejected": -9.334639549255371, "step": 614 }, { "epoch": 0.21, "learning_rate": 1.9707073340850215e-06, "logits/chosen": -1.036520004272461, "logits/rejected": -1.0065844058990479, "logps/chosen": -253.0247802734375, "logps/rejected": -319.5675048828125, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.3008877038955688, "rewards/margins": 8.465633392333984, "rewards/rejected": -9.766520500183105, "step": 615 }, { "epoch": 0.21, "learning_rate": 1.970574389417657e-06, "logits/chosen": -1.0117357969284058, "logits/rejected": -0.988681972026825, "logps/chosen": -170.735107421875, "logps/rejected": -204.94134521484375, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -1.294456124305725, "rewards/margins": 6.252416610717773, "rewards/rejected": -7.546873092651367, "step": 616 }, { "epoch": 0.21, "learning_rate": 1.970441148253211e-06, "logits/chosen": -1.0419957637786865, "logits/rejected": -1.0194648504257202, "logps/chosen": -219.94483947753906, "logps/rejected": -297.390380859375, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.4611174166202545, "rewards/margins": 10.502233505249023, "rewards/rejected": -10.963351249694824, "step": 617 }, { "epoch": 0.21, "learning_rate": 1.9703076106323875e-06, "logits/chosen": -1.025477647781372, "logits/rejected": -1.0076549053192139, "logps/chosen": -169.69357299804688, "logps/rejected": -241.9624481201172, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -0.4257259964942932, "rewards/margins": 7.831089019775391, "rewards/rejected": -8.256814956665039, "step": 618 }, { "epoch": 0.21, "learning_rate": 1.9701737765959787e-06, "logits/chosen": -1.0077927112579346, "logits/rejected": -0.9854635000228882, "logps/chosen": -196.99606323242188, "logps/rejected": -242.73951721191406, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.10396620631217957, "rewards/margins": 7.029445171356201, "rewards/rejected": -7.133410930633545, "step": 619 }, { "epoch": 0.21, "learning_rate": 1.9700396461848694e-06, "logits/chosen": -0.9803547263145447, "logits/rejected": -0.9453812837600708, "logps/chosen": -195.19403076171875, "logps/rejected": -167.5091094970703, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.67635577917099, "rewards/margins": 6.009521007537842, "rewards/rejected": -6.685876846313477, "step": 620 }, { "epoch": 0.21, "learning_rate": 1.9699052194400357e-06, "logits/chosen": -1.1228145360946655, "logits/rejected": -1.0831584930419922, "logps/chosen": -199.73602294921875, "logps/rejected": -226.23252868652344, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -1.5069228410720825, "rewards/margins": 8.174980163574219, "rewards/rejected": -9.681903839111328, "step": 621 }, { "epoch": 0.21, "learning_rate": 1.969770496402542e-06, "logits/chosen": -0.9843474626541138, "logits/rejected": -0.9620890617370605, "logps/chosen": -162.37217712402344, "logps/rejected": -283.6748962402344, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.1376656293869019, "rewards/margins": 10.539735794067383, "rewards/rejected": -11.677401542663574, "step": 622 }, { "epoch": 0.21, "learning_rate": 1.969635477113545e-06, "logits/chosen": -1.0205281972885132, "logits/rejected": -1.0025391578674316, "logps/chosen": -178.0424346923828, "logps/rejected": -275.94488525390625, "loss": 0.0342, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0539283752441406, "rewards/margins": 7.897787094116211, "rewards/rejected": -9.951716423034668, "step": 623 }, { "epoch": 0.21, "learning_rate": 1.969500161614291e-06, "logits/chosen": -1.0436773300170898, "logits/rejected": -0.9934706091880798, "logps/chosen": -178.4107666015625, "logps/rejected": -286.53924560546875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.7618216276168823, "rewards/margins": 11.206098556518555, "rewards/rejected": -11.967921257019043, "step": 624 }, { "epoch": 0.21, "learning_rate": 1.9693645499461174e-06, "logits/chosen": -1.0728391408920288, "logits/rejected": -1.0528666973114014, "logps/chosen": -150.87611389160156, "logps/rejected": -254.32789611816406, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": -1.6469178199768066, "rewards/margins": 8.162821769714355, "rewards/rejected": -9.80974006652832, "step": 625 }, { "epoch": 0.21, "learning_rate": 1.9692286421504513e-06, "logits/chosen": -1.0017802715301514, "logits/rejected": -0.9606054425239563, "logps/chosen": -236.2113037109375, "logps/rejected": -286.7795104980469, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.2422146201133728, "rewards/margins": 11.143145561218262, "rewards/rejected": -11.385360717773438, "step": 626 }, { "epoch": 0.21, "learning_rate": 1.96909243826881e-06, "logits/chosen": -0.9695266485214233, "logits/rejected": -0.9303867816925049, "logps/chosen": -275.6893615722656, "logps/rejected": -327.8389892578125, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.8453956246376038, "rewards/margins": 9.775116920471191, "rewards/rejected": -10.620512962341309, "step": 627 }, { "epoch": 0.21, "learning_rate": 1.9689559383428033e-06, "logits/chosen": -0.9871203899383545, "logits/rejected": -0.9476056098937988, "logps/chosen": -220.78167724609375, "logps/rejected": -256.8388977050781, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.9192660450935364, "rewards/margins": 7.383626937866211, "rewards/rejected": -8.30289363861084, "step": 628 }, { "epoch": 0.21, "learning_rate": 1.9688191424141287e-06, "logits/chosen": -1.0310821533203125, "logits/rejected": -1.0142827033996582, "logps/chosen": -242.2467803955078, "logps/rejected": -294.3756103515625, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -0.7684298157691956, "rewards/margins": 7.65114688873291, "rewards/rejected": -8.419576644897461, "step": 629 }, { "epoch": 0.22, "learning_rate": 1.968682050524576e-06, "logits/chosen": -0.9652805924415588, "logits/rejected": -0.9348602890968323, "logps/chosen": -175.21946716308594, "logps/rejected": -236.92608642578125, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -1.8423513174057007, "rewards/margins": 7.571371555328369, "rewards/rejected": -9.413722038269043, "step": 630 }, { "epoch": 0.22, "learning_rate": 1.9685446627160256e-06, "logits/chosen": -0.9917135834693909, "logits/rejected": -0.9635686874389648, "logps/chosen": -233.01011657714844, "logps/rejected": -263.65399169921875, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.4550223350524902, "rewards/margins": 7.7235002517700195, "rewards/rejected": -9.178522109985352, "step": 631 }, { "epoch": 0.22, "learning_rate": 1.9684069790304467e-06, "logits/chosen": -1.0337179899215698, "logits/rejected": -1.0058188438415527, "logps/chosen": -163.9872589111328, "logps/rejected": -218.1875, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.7815344929695129, "rewards/margins": 7.154051780700684, "rewards/rejected": -7.935586452484131, "step": 632 }, { "epoch": 0.22, "learning_rate": 1.9682689995099e-06, "logits/chosen": -1.0512524843215942, "logits/rejected": -1.0241687297821045, "logps/chosen": -175.11318969726562, "logps/rejected": -259.4974365234375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.5275185108184814, "rewards/margins": 7.954215049743652, "rewards/rejected": -8.481733322143555, "step": 633 }, { "epoch": 0.22, "learning_rate": 1.9681307241965365e-06, "logits/chosen": -1.0655183792114258, "logits/rejected": -1.0361026525497437, "logps/chosen": -186.89712524414062, "logps/rejected": -243.2215576171875, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -0.8413152098655701, "rewards/margins": 7.977204322814941, "rewards/rejected": -8.81851863861084, "step": 634 }, { "epoch": 0.22, "learning_rate": 1.967992153132597e-06, "logits/chosen": -0.9918359518051147, "logits/rejected": -0.9872716069221497, "logps/chosen": -144.9886016845703, "logps/rejected": -217.534423828125, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -0.7242692708969116, "rewards/margins": 5.604984760284424, "rewards/rejected": -6.329254150390625, "step": 635 }, { "epoch": 0.22, "learning_rate": 1.9678532863604134e-06, "logits/chosen": -0.9696137309074402, "logits/rejected": -0.9393932819366455, "logps/chosen": -182.46739196777344, "logps/rejected": -258.04461669921875, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -1.1299999952316284, "rewards/margins": 7.868965148925781, "rewards/rejected": -8.9989652633667, "step": 636 }, { "epoch": 0.22, "learning_rate": 1.9677141239224073e-06, "logits/chosen": -1.0612804889678955, "logits/rejected": -1.042803168296814, "logps/chosen": -195.42855834960938, "logps/rejected": -270.697509765625, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -0.5586807727813721, "rewards/margins": 8.197094917297363, "rewards/rejected": -8.755775451660156, "step": 637 }, { "epoch": 0.22, "learning_rate": 1.9675746658610915e-06, "logits/chosen": -1.0378834009170532, "logits/rejected": -0.9941144585609436, "logps/chosen": -164.82785034179688, "logps/rejected": -226.58648681640625, "loss": 0.047, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07416681945323944, "rewards/margins": 8.339499473571777, "rewards/rejected": -8.413665771484375, "step": 638 }, { "epoch": 0.22, "learning_rate": 1.9674349122190677e-06, "logits/chosen": -0.9082534909248352, "logits/rejected": -0.8806290030479431, "logps/chosen": -205.37847900390625, "logps/rejected": -276.3962097167969, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.8470575213432312, "rewards/margins": 8.544179916381836, "rewards/rejected": -9.39123821258545, "step": 639 }, { "epoch": 0.22, "learning_rate": 1.9672948630390295e-06, "logits/chosen": -1.1453806161880493, "logits/rejected": -1.1088734865188599, "logps/chosen": -180.5576171875, "logps/rejected": -237.164794921875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -1.3980765342712402, "rewards/margins": 8.099212646484375, "rewards/rejected": -9.497288703918457, "step": 640 }, { "epoch": 0.22, "learning_rate": 1.967154518363759e-06, "logits/chosen": -1.0639292001724243, "logits/rejected": -1.0383260250091553, "logps/chosen": -203.92324829101562, "logps/rejected": -232.1083984375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4841984510421753, "rewards/margins": 7.513269901275635, "rewards/rejected": -7.997467994689941, "step": 641 }, { "epoch": 0.22, "learning_rate": 1.967013878236131e-06, "logits/chosen": -1.1289010047912598, "logits/rejected": -1.1333467960357666, "logps/chosen": -228.44180297851562, "logps/rejected": -321.05120849609375, "loss": 0.0406, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4915764331817627, "rewards/margins": 7.92516565322876, "rewards/rejected": -9.416741371154785, "step": 642 }, { "epoch": 0.22, "learning_rate": 1.9668729426991076e-06, "logits/chosen": -0.9521026015281677, "logits/rejected": -0.930547833442688, "logps/chosen": -183.835205078125, "logps/rejected": -275.86895751953125, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.4450717568397522, "rewards/margins": 10.150436401367188, "rewards/rejected": -10.595508575439453, "step": 643 }, { "epoch": 0.22, "learning_rate": 1.9667317117957435e-06, "logits/chosen": -1.0011229515075684, "logits/rejected": -0.9796583652496338, "logps/chosen": -210.14315795898438, "logps/rejected": -351.76007080078125, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.3515510559082031, "rewards/margins": 11.012491226196289, "rewards/rejected": -11.36404037475586, "step": 644 }, { "epoch": 0.22, "learning_rate": 1.9665901855691827e-06, "logits/chosen": -1.079043984413147, "logits/rejected": -1.0340443849563599, "logps/chosen": -266.8999938964844, "logps/rejected": -302.5279235839844, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": -0.36231887340545654, "rewards/margins": 11.2236909866333, "rewards/rejected": -11.586009979248047, "step": 645 }, { "epoch": 0.22, "learning_rate": 1.9664483640626593e-06, "logits/chosen": -1.0771938562393188, "logits/rejected": -1.0413419008255005, "logps/chosen": -159.80340576171875, "logps/rejected": -228.28636169433594, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.025618165731430054, "rewards/margins": 8.061450004577637, "rewards/rejected": -8.087067604064941, "step": 646 }, { "epoch": 0.22, "learning_rate": 1.9663062473194983e-06, "logits/chosen": -1.0515267848968506, "logits/rejected": -1.0352050065994263, "logps/chosen": -127.62335205078125, "logps/rejected": -215.0012969970703, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -0.12482747435569763, "rewards/margins": 9.404072761535645, "rewards/rejected": -9.528899192810059, "step": 647 }, { "epoch": 0.22, "learning_rate": 1.966163835383114e-06, "logits/chosen": -1.0756125450134277, "logits/rejected": -1.0579171180725098, "logps/chosen": -212.3084716796875, "logps/rejected": -346.10986328125, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -1.4403663873672485, "rewards/margins": 11.151782035827637, "rewards/rejected": -12.592147827148438, "step": 648 }, { "epoch": 0.22, "learning_rate": 1.966021128297011e-06, "logits/chosen": -0.9067057967185974, "logits/rejected": -0.8737395405769348, "logps/chosen": -155.55014038085938, "logps/rejected": -234.62344360351562, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.30000266432762146, "rewards/margins": 8.565472602844238, "rewards/rejected": -8.26546859741211, "step": 649 }, { "epoch": 0.22, "learning_rate": 1.965878126104785e-06, "logits/chosen": -1.04569673538208, "logits/rejected": -1.0146284103393555, "logps/chosen": -144.30030822753906, "logps/rejected": -202.470458984375, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -1.5962071418762207, "rewards/margins": 6.854212760925293, "rewards/rejected": -8.450420379638672, "step": 650 }, { "epoch": 0.22, "learning_rate": 1.9657348288501206e-06, "logits/chosen": -1.0653152465820312, "logits/rejected": -1.0226447582244873, "logps/chosen": -305.31439208984375, "logps/rejected": -347.8772277832031, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -0.11962635815143585, "rewards/margins": 12.076587677001953, "rewards/rejected": -12.196213722229004, "step": 651 }, { "epoch": 0.22, "learning_rate": 1.965591236576794e-06, "logits/chosen": -0.9722776412963867, "logits/rejected": -0.9609625935554504, "logps/chosen": -159.68911743164062, "logps/rejected": -292.383544921875, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -0.16465352475643158, "rewards/margins": 9.626555442810059, "rewards/rejected": -9.791210174560547, "step": 652 }, { "epoch": 0.22, "learning_rate": 1.9654473493286695e-06, "logits/chosen": -1.018560767173767, "logits/rejected": -0.9968494176864624, "logps/chosen": -206.1985321044922, "logps/rejected": -309.0947265625, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -2.411818027496338, "rewards/margins": 7.371646404266357, "rewards/rejected": -9.783465385437012, "step": 653 }, { "epoch": 0.22, "learning_rate": 1.9653031671497035e-06, "logits/chosen": -1.032963752746582, "logits/rejected": -0.9889149069786072, "logps/chosen": -284.1954650878906, "logps/rejected": -335.92340087890625, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.914782166481018, "rewards/margins": 10.562662124633789, "rewards/rejected": -12.47744369506836, "step": 654 }, { "epoch": 0.22, "learning_rate": 1.965158690083941e-06, "logits/chosen": -1.043153166770935, "logits/rejected": -1.0160672664642334, "logps/chosen": -222.2343292236328, "logps/rejected": -274.4403076171875, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/chosen": -0.7254813313484192, "rewards/margins": 8.265396118164062, "rewards/rejected": -8.990877151489258, "step": 655 }, { "epoch": 0.22, "learning_rate": 1.965013918175519e-06, "logits/chosen": -1.0868022441864014, "logits/rejected": -1.0446505546569824, "logps/chosen": -183.05360412597656, "logps/rejected": -227.61508178710938, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6083686351776123, "rewards/margins": 9.169395446777344, "rewards/rejected": -9.777764320373535, "step": 656 }, { "epoch": 0.22, "learning_rate": 1.964868851468662e-06, "logits/chosen": -1.0169932842254639, "logits/rejected": -0.9911070466041565, "logps/chosen": -201.995849609375, "logps/rejected": -300.40277099609375, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -0.0900600478053093, "rewards/margins": 10.349635124206543, "rewards/rejected": -10.439696311950684, "step": 657 }, { "epoch": 0.22, "learning_rate": 1.9647234900076863e-06, "logits/chosen": -0.9746060967445374, "logits/rejected": -0.9365993738174438, "logps/chosen": -248.68650817871094, "logps/rejected": -356.85430908203125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.0538514852523804, "rewards/margins": 10.221041679382324, "rewards/rejected": -11.274893760681152, "step": 658 }, { "epoch": 0.22, "learning_rate": 1.964577833836998e-06, "logits/chosen": -1.0744062662124634, "logits/rejected": -1.0454988479614258, "logps/chosen": -211.5369415283203, "logps/rejected": -262.020751953125, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -0.5862870216369629, "rewards/margins": 8.667384147644043, "rewards/rejected": -9.253671646118164, "step": 659 }, { "epoch": 0.23, "learning_rate": 1.9644318830010926e-06, "logits/chosen": -1.0475540161132812, "logits/rejected": -1.0014684200286865, "logps/chosen": -224.05709838867188, "logps/rejected": -325.707763671875, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.9635963439941406, "rewards/margins": 11.684767723083496, "rewards/rejected": -12.648364067077637, "step": 660 }, { "epoch": 0.23, "learning_rate": 1.9642856375445565e-06, "logits/chosen": -1.0352061986923218, "logits/rejected": -1.0067598819732666, "logps/chosen": -191.39923095703125, "logps/rejected": -320.1025695800781, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -1.0915428400039673, "rewards/margins": 10.642631530761719, "rewards/rejected": -11.734175682067871, "step": 661 }, { "epoch": 0.23, "learning_rate": 1.964139097512066e-06, "logits/chosen": -1.0001800060272217, "logits/rejected": -0.9812297821044922, "logps/chosen": -187.08218383789062, "logps/rejected": -251.05404663085938, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.9704875946044922, "rewards/margins": 7.654929161071777, "rewards/rejected": -8.625417709350586, "step": 662 }, { "epoch": 0.23, "learning_rate": 1.963992262948386e-06, "logits/chosen": -1.123551607131958, "logits/rejected": -1.0915981531143188, "logps/chosen": -129.5501251220703, "logps/rejected": -227.64157104492188, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": -1.2695375680923462, "rewards/margins": 7.9836931228637695, "rewards/rejected": -9.253230094909668, "step": 663 }, { "epoch": 0.23, "learning_rate": 1.9638451338983736e-06, "logits/chosen": -1.0069488286972046, "logits/rejected": -0.9612151384353638, "logps/chosen": -253.24270629882812, "logps/rejected": -270.3935241699219, "loss": 0.0362, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3129057884216309, "rewards/margins": 8.96303653717041, "rewards/rejected": -10.275941848754883, "step": 664 }, { "epoch": 0.23, "learning_rate": 1.963697710406974e-06, "logits/chosen": -1.0084797143936157, "logits/rejected": -0.9714872241020203, "logps/chosen": -138.89804077148438, "logps/rejected": -189.7554473876953, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8951245546340942, "rewards/margins": 7.525330543518066, "rewards/rejected": -8.420454978942871, "step": 665 }, { "epoch": 0.23, "learning_rate": 1.9635499925192227e-06, "logits/chosen": -1.1073062419891357, "logits/rejected": -1.0547325611114502, "logps/chosen": -276.13104248046875, "logps/rejected": -313.5483703613281, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 0.3698904514312744, "rewards/margins": 10.977814674377441, "rewards/rejected": -10.607922554016113, "step": 666 }, { "epoch": 0.23, "learning_rate": 1.9634019802802465e-06, "logits/chosen": -1.0588667392730713, "logits/rejected": -1.0423061847686768, "logps/chosen": -252.88177490234375, "logps/rejected": -351.1472473144531, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -1.2914488315582275, "rewards/margins": 8.621979713439941, "rewards/rejected": -9.913427352905273, "step": 667 }, { "epoch": 0.23, "learning_rate": 1.9632536737352605e-06, "logits/chosen": -0.9350774884223938, "logits/rejected": -0.8727109432220459, "logps/chosen": -223.38636779785156, "logps/rejected": -343.8854675292969, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -1.3966913223266602, "rewards/margins": 11.113850593566895, "rewards/rejected": -12.510541915893555, "step": 668 }, { "epoch": 0.23, "learning_rate": 1.9631050729295705e-06, "logits/chosen": -0.8516093492507935, "logits/rejected": -0.8224790096282959, "logps/chosen": -151.1290283203125, "logps/rejected": -202.3432159423828, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664976835250854, "rewards/margins": 5.907946586608887, "rewards/rejected": -7.47444486618042, "step": 669 }, { "epoch": 0.23, "learning_rate": 1.9629561779085714e-06, "logits/chosen": -1.0800334215164185, "logits/rejected": -1.0556432008743286, "logps/chosen": -196.30101013183594, "logps/rejected": -284.6798095703125, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.942827582359314, "rewards/margins": 9.07481861114502, "rewards/rejected": -10.017646789550781, "step": 670 }, { "epoch": 0.23, "learning_rate": 1.9628069887177495e-06, "logits/chosen": -1.0566284656524658, "logits/rejected": -1.02568781375885, "logps/chosen": -176.43626403808594, "logps/rejected": -273.16827392578125, "loss": 0.0298, "rewards/accuracies": 0.9375, "rewards/chosen": -1.768890619277954, "rewards/margins": 9.675127029418945, "rewards/rejected": -11.44401741027832, "step": 671 }, { "epoch": 0.23, "learning_rate": 1.9626575054026792e-06, "logits/chosen": -1.0217009782791138, "logits/rejected": -0.9983780384063721, "logps/chosen": -194.24346923828125, "logps/rejected": -258.3943176269531, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -1.1536442041397095, "rewards/margins": 7.517274379730225, "rewards/rejected": -8.670918464660645, "step": 672 }, { "epoch": 0.23, "learning_rate": 1.9625077280090263e-06, "logits/chosen": -0.9231504797935486, "logits/rejected": -0.8991093635559082, "logps/chosen": -216.908203125, "logps/rejected": -296.7214660644531, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -1.1448911428451538, "rewards/margins": 10.116788864135742, "rewards/rejected": -11.261680603027344, "step": 673 }, { "epoch": 0.23, "learning_rate": 1.962357656582545e-06, "logits/chosen": -1.0207840204238892, "logits/rejected": -1.0057406425476074, "logps/chosen": -153.6666259765625, "logps/rejected": -220.1033935546875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -2.1734917163848877, "rewards/margins": 7.064248561859131, "rewards/rejected": -9.237740516662598, "step": 674 }, { "epoch": 0.23, "learning_rate": 1.962207291169081e-06, "logits/chosen": -1.0202800035476685, "logits/rejected": -1.0017344951629639, "logps/chosen": -153.6106414794922, "logps/rejected": -254.55299377441406, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4512866735458374, "rewards/margins": 9.38815689086914, "rewards/rejected": -10.839444160461426, "step": 675 }, { "epoch": 0.23, "learning_rate": 1.9620566318145676e-06, "logits/chosen": -0.8683407306671143, "logits/rejected": -0.8462688326835632, "logps/chosen": -215.56996154785156, "logps/rejected": -314.83465576171875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -1.7450116872787476, "rewards/margins": 10.404037475585938, "rewards/rejected": -12.1490478515625, "step": 676 }, { "epoch": 0.23, "learning_rate": 1.96190567856503e-06, "logits/chosen": -1.0298688411712646, "logits/rejected": -0.9955416321754456, "logps/chosen": -233.06283569335938, "logps/rejected": -282.71075439453125, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -1.2563210725784302, "rewards/margins": 7.794887542724609, "rewards/rejected": -9.05120849609375, "step": 677 }, { "epoch": 0.23, "learning_rate": 1.961754431466583e-06, "logits/chosen": -1.0521329641342163, "logits/rejected": -1.0311657190322876, "logps/chosen": -183.37449645996094, "logps/rejected": -234.6832275390625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.11268745362758636, "rewards/margins": 8.956180572509766, "rewards/rejected": -9.068868637084961, "step": 678 }, { "epoch": 0.23, "learning_rate": 1.961602890565429e-06, "logits/chosen": -0.9237738251686096, "logits/rejected": -0.9004783630371094, "logps/chosen": -187.092041015625, "logps/rejected": -260.9742431640625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.2204638421535492, "rewards/margins": 8.8482084274292, "rewards/rejected": -9.068672180175781, "step": 679 }, { "epoch": 0.23, "learning_rate": 1.9614510559078624e-06, "logits/chosen": -1.0548274517059326, "logits/rejected": -1.028143286705017, "logps/chosen": -238.1400604248047, "logps/rejected": -352.8462829589844, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -2.5403780937194824, "rewards/margins": 10.106640815734863, "rewards/rejected": -12.647019386291504, "step": 680 }, { "epoch": 0.23, "learning_rate": 1.9612989275402666e-06, "logits/chosen": -0.9889004230499268, "logits/rejected": -0.943626344203949, "logps/chosen": -274.3614196777344, "logps/rejected": -366.3895568847656, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2048968076705933, "rewards/margins": 8.515274047851562, "rewards/rejected": -9.720170974731445, "step": 681 }, { "epoch": 0.23, "learning_rate": 1.9611465055091144e-06, "logits/chosen": -0.8475651741027832, "logits/rejected": -0.8135337233543396, "logps/chosen": -205.0541534423828, "logps/rejected": -259.55572509765625, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -1.2357250452041626, "rewards/margins": 8.684035301208496, "rewards/rejected": -9.919759750366211, "step": 682 }, { "epoch": 0.23, "learning_rate": 1.9609937898609694e-06, "logits/chosen": -1.0603822469711304, "logits/rejected": -1.033166766166687, "logps/chosen": -177.046875, "logps/rejected": -216.40283203125, "loss": 0.0358, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0506751537322998, "rewards/margins": 8.015528678894043, "rewards/rejected": -9.066204071044922, "step": 683 }, { "epoch": 0.23, "learning_rate": 1.960840780642483e-06, "logits/chosen": -1.0157976150512695, "logits/rejected": -1.0038741827011108, "logps/chosen": -170.06137084960938, "logps/rejected": -252.653076171875, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -1.1474288702011108, "rewards/margins": 7.885977745056152, "rewards/rejected": -9.033407211303711, "step": 684 }, { "epoch": 0.23, "learning_rate": 1.9606874779003985e-06, "logits/chosen": -1.0526299476623535, "logits/rejected": -0.988440215587616, "logps/chosen": -257.4499206542969, "logps/rejected": -259.06640625, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.100244402885437, "rewards/margins": 9.744131088256836, "rewards/rejected": -10.844375610351562, "step": 685 }, { "epoch": 0.23, "learning_rate": 1.960533881681547e-06, "logits/chosen": -0.9319531321525574, "logits/rejected": -0.9062334895133972, "logps/chosen": -212.47116088867188, "logps/rejected": -317.78826904296875, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.4861142039299011, "rewards/margins": 10.85464859008789, "rewards/rejected": -10.36853313446045, "step": 686 }, { "epoch": 0.23, "learning_rate": 1.9603799920328506e-06, "logits/chosen": -0.975054144859314, "logits/rejected": -0.9492841362953186, "logps/chosen": -205.17034912109375, "logps/rejected": -305.6719055175781, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -1.4045274257659912, "rewards/margins": 7.616394996643066, "rewards/rejected": -9.020922660827637, "step": 687 }, { "epoch": 0.23, "learning_rate": 1.96022580900132e-06, "logits/chosen": -0.884172260761261, "logits/rejected": -0.8517642021179199, "logps/chosen": -170.8601531982422, "logps/rejected": -243.27789306640625, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.881413459777832, "rewards/margins": 8.570541381835938, "rewards/rejected": -9.45195484161377, "step": 688 }, { "epoch": 0.24, "learning_rate": 1.9600713326340557e-06, "logits/chosen": -1.044819712638855, "logits/rejected": -1.0151269435882568, "logps/chosen": -232.1649932861328, "logps/rejected": -318.8421630859375, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -1.142850637435913, "rewards/margins": 10.578606605529785, "rewards/rejected": -11.721458435058594, "step": 689 }, { "epoch": 0.24, "learning_rate": 1.959916562978249e-06, "logits/chosen": -1.0829670429229736, "logits/rejected": -1.0590476989746094, "logps/chosen": -154.43345642089844, "logps/rejected": -220.9655303955078, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.273014098405838, "rewards/margins": 7.67057991027832, "rewards/rejected": -7.943594455718994, "step": 690 }, { "epoch": 0.24, "learning_rate": 1.9597615000811796e-06, "logits/chosen": -0.9264284372329712, "logits/rejected": -0.9012675285339355, "logps/chosen": -168.75376892089844, "logps/rejected": -267.5354919433594, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -1.661386251449585, "rewards/margins": 6.7507548332214355, "rewards/rejected": -8.412140846252441, "step": 691 }, { "epoch": 0.24, "learning_rate": 1.959606143990217e-06, "logits/chosen": -1.0347621440887451, "logits/rejected": -1.011075735092163, "logps/chosen": -182.36871337890625, "logps/rejected": -280.89727783203125, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": -1.7931784391403198, "rewards/margins": 9.383065223693848, "rewards/rejected": -11.176244735717773, "step": 692 }, { "epoch": 0.24, "learning_rate": 1.95945049475282e-06, "logits/chosen": -1.0016303062438965, "logits/rejected": -0.968299150466919, "logps/chosen": -230.35548400878906, "logps/rejected": -308.3970947265625, "loss": 0.0669, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4266706705093384, "rewards/margins": 8.979804992675781, "rewards/rejected": -10.406475067138672, "step": 693 }, { "epoch": 0.24, "learning_rate": 1.959294552416537e-06, "logits/chosen": -1.0945240259170532, "logits/rejected": -1.0629767179489136, "logps/chosen": -146.7774200439453, "logps/rejected": -224.3486785888672, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": -0.9177032709121704, "rewards/margins": 9.099295616149902, "rewards/rejected": -10.016999244689941, "step": 694 }, { "epoch": 0.24, "learning_rate": 1.9591383170290075e-06, "logits/chosen": -0.8917986154556274, "logits/rejected": -0.8662605285644531, "logps/chosen": -237.54916381835938, "logps/rejected": -291.08294677734375, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -2.2291207313537598, "rewards/margins": 7.803454875946045, "rewards/rejected": -10.032575607299805, "step": 695 }, { "epoch": 0.24, "learning_rate": 1.9589817886379583e-06, "logits/chosen": -0.9251880645751953, "logits/rejected": -0.9072622656822205, "logps/chosen": -173.06036376953125, "logps/rejected": -308.0235900878906, "loss": 0.0358, "rewards/accuracies": 0.9375, "rewards/chosen": -0.99141526222229, "rewards/margins": 9.563018798828125, "rewards/rejected": -10.554434776306152, "step": 696 }, { "epoch": 0.24, "learning_rate": 1.958824967291207e-06, "logits/chosen": -0.8885049819946289, "logits/rejected": -0.8730230927467346, "logps/chosen": -214.6798095703125, "logps/rejected": -346.612060546875, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -1.187886357307434, "rewards/margins": 11.078457832336426, "rewards/rejected": -12.266343116760254, "step": 697 }, { "epoch": 0.24, "learning_rate": 1.9586678530366606e-06, "logits/chosen": -1.0464407205581665, "logits/rejected": -1.0262647867202759, "logps/chosen": -204.7367401123047, "logps/rejected": -347.6671142578125, "loss": 0.0431, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0720852613449097, "rewards/margins": 10.457124710083008, "rewards/rejected": -11.529210090637207, "step": 698 }, { "epoch": 0.24, "learning_rate": 1.9585104459223143e-06, "logits/chosen": -0.999494731426239, "logits/rejected": -0.9699658155441284, "logps/chosen": -209.35873413085938, "logps/rejected": -312.3683776855469, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.7561853528022766, "rewards/margins": 11.623835563659668, "rewards/rejected": -12.380021095275879, "step": 699 }, { "epoch": 0.24, "learning_rate": 1.9583527459962553e-06, "logits/chosen": -1.137784481048584, "logits/rejected": -1.0995234251022339, "logps/chosen": -218.2805938720703, "logps/rejected": -279.6593017578125, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": -1.0487946271896362, "rewards/margins": 8.788318634033203, "rewards/rejected": -9.837113380432129, "step": 700 }, { "epoch": 0.24, "learning_rate": 1.9581947533066573e-06, "logits/chosen": -0.997795581817627, "logits/rejected": -0.993580162525177, "logps/chosen": -209.7467041015625, "logps/rejected": -329.090087890625, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -1.164149284362793, "rewards/margins": 9.844196319580078, "rewards/rejected": -11.008344650268555, "step": 701 }, { "epoch": 0.24, "learning_rate": 1.958036467901786e-06, "logits/chosen": -1.0852370262145996, "logits/rejected": -1.0549380779266357, "logps/chosen": -208.29159545898438, "logps/rejected": -291.6758117675781, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -0.05915844440460205, "rewards/margins": 10.88127326965332, "rewards/rejected": -10.9404296875, "step": 702 }, { "epoch": 0.24, "learning_rate": 1.957877889829995e-06, "logits/chosen": -1.016475796699524, "logits/rejected": -0.977678656578064, "logps/chosen": -316.48345947265625, "logps/rejected": -334.42047119140625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 0.30749940872192383, "rewards/margins": 9.882406234741211, "rewards/rejected": -9.574907302856445, "step": 703 }, { "epoch": 0.24, "learning_rate": 1.957719019139727e-06, "logits/chosen": -1.0847878456115723, "logits/rejected": -1.0530849695205688, "logps/chosen": -203.47789001464844, "logps/rejected": -280.8157653808594, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.518596887588501, "rewards/margins": 8.059813499450684, "rewards/rejected": -9.578410148620605, "step": 704 }, { "epoch": 0.24, "learning_rate": 1.957559855879516e-06, "logits/chosen": -1.0473880767822266, "logits/rejected": -1.0160950422286987, "logps/chosen": -213.18255615234375, "logps/rejected": -323.6092529296875, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -0.3700045347213745, "rewards/margins": 10.755389213562012, "rewards/rejected": -11.125394821166992, "step": 705 }, { "epoch": 0.24, "learning_rate": 1.9574004000979834e-06, "logits/chosen": -1.0680112838745117, "logits/rejected": -1.0436965227127075, "logps/chosen": -266.01141357421875, "logps/rejected": -352.98162841796875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.0900388956069946, "rewards/margins": 9.24240779876709, "rewards/rejected": -10.332447052001953, "step": 706 }, { "epoch": 0.24, "learning_rate": 1.957240651843841e-06, "logits/chosen": -1.0687328577041626, "logits/rejected": -1.0416321754455566, "logps/chosen": -178.66775512695312, "logps/rejected": -303.393798828125, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.22450807690620422, "rewards/margins": 10.725939750671387, "rewards/rejected": -10.950447082519531, "step": 707 }, { "epoch": 0.24, "learning_rate": 1.9570806111658896e-06, "logits/chosen": -1.0664212703704834, "logits/rejected": -1.048960566520691, "logps/chosen": -206.128173828125, "logps/rejected": -262.605224609375, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -0.40237438678741455, "rewards/margins": 8.180137634277344, "rewards/rejected": -8.582511901855469, "step": 708 }, { "epoch": 0.24, "learning_rate": 1.95692027811302e-06, "logits/chosen": -1.012845754623413, "logits/rejected": -0.9774873852729797, "logps/chosen": -206.1182098388672, "logps/rejected": -329.833740234375, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.5738999247550964, "rewards/margins": 9.740826606750488, "rewards/rejected": -10.314725875854492, "step": 709 }, { "epoch": 0.24, "learning_rate": 1.956759652734211e-06, "logits/chosen": -0.9832122325897217, "logits/rejected": -0.9557477831840515, "logps/chosen": -169.1760711669922, "logps/rejected": -261.9610290527344, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -0.967536985874176, "rewards/margins": 9.769007682800293, "rewards/rejected": -10.736543655395508, "step": 710 }, { "epoch": 0.24, "learning_rate": 1.956598735078531e-06, "logits/chosen": -0.9534260034561157, "logits/rejected": -0.8971152901649475, "logps/chosen": -227.65139770507812, "logps/rejected": -306.9148864746094, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.004835605621338, "rewards/margins": 10.119352340698242, "rewards/rejected": -11.124188423156738, "step": 711 }, { "epoch": 0.24, "learning_rate": 1.9564375251951395e-06, "logits/chosen": -1.1063364744186401, "logits/rejected": -1.0579935312271118, "logps/chosen": -237.13865661621094, "logps/rejected": -282.0549011230469, "loss": 0.0372, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02318219467997551, "rewards/margins": 7.802489757537842, "rewards/rejected": -7.825672149658203, "step": 712 }, { "epoch": 0.24, "learning_rate": 1.956276023133283e-06, "logits/chosen": -0.9892926812171936, "logits/rejected": -0.9955443143844604, "logps/chosen": -141.3991241455078, "logps/rejected": -264.668212890625, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -0.21233777701854706, "rewards/margins": 8.91733455657959, "rewards/rejected": -9.129672050476074, "step": 713 }, { "epoch": 0.24, "learning_rate": 1.9561142289422985e-06, "logits/chosen": -1.1023445129394531, "logits/rejected": -1.0851497650146484, "logps/chosen": -103.25350189208984, "logps/rejected": -201.5673370361328, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -1.5244200229644775, "rewards/margins": 7.702692985534668, "rewards/rejected": -9.227112770080566, "step": 714 }, { "epoch": 0.24, "learning_rate": 1.955952142671612e-06, "logits/chosen": -0.8950226902961731, "logits/rejected": -0.8446094393730164, "logps/chosen": -195.66973876953125, "logps/rejected": -250.1131591796875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.2158262729644775, "rewards/margins": 10.037874221801758, "rewards/rejected": -11.25369930267334, "step": 715 }, { "epoch": 0.24, "learning_rate": 1.9557897643707375e-06, "logits/chosen": -1.0221203565597534, "logits/rejected": -1.0165373086929321, "logps/chosen": -205.94442749023438, "logps/rejected": -300.8836364746094, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -0.6596049666404724, "rewards/margins": 8.960823059082031, "rewards/rejected": -9.620427131652832, "step": 716 }, { "epoch": 0.24, "learning_rate": 1.9556270940892804e-06, "logits/chosen": -0.9954067468643188, "logits/rejected": -0.9665210843086243, "logps/chosen": -223.65243530273438, "logps/rejected": -332.84619140625, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.9030921459197998, "rewards/margins": 9.528157234191895, "rewards/rejected": -10.431249618530273, "step": 717 }, { "epoch": 0.25, "learning_rate": 1.9554641318769342e-06, "logits/chosen": -1.0762343406677246, "logits/rejected": -1.0581655502319336, "logps/chosen": -141.22132873535156, "logps/rejected": -211.4622344970703, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -1.4130803346633911, "rewards/margins": 6.424720287322998, "rewards/rejected": -7.837800025939941, "step": 718 }, { "epoch": 0.25, "learning_rate": 1.9553008777834816e-06, "logits/chosen": -0.9353131651878357, "logits/rejected": -0.8998590111732483, "logps/chosen": -191.09535217285156, "logps/rejected": -244.47267150878906, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -0.7703301906585693, "rewards/margins": 7.876553058624268, "rewards/rejected": -8.646883010864258, "step": 719 }, { "epoch": 0.25, "learning_rate": 1.9551373318587937e-06, "logits/chosen": -0.9928648471832275, "logits/rejected": -0.9664286971092224, "logps/chosen": -216.87490844726562, "logps/rejected": -314.8211669921875, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.1999949216842651, "rewards/margins": 9.677196502685547, "rewards/rejected": -10.877191543579102, "step": 720 }, { "epoch": 0.25, "learning_rate": 1.954973494152833e-06, "logits/chosen": -1.0229790210723877, "logits/rejected": -1.021851658821106, "logps/chosen": -163.89401245117188, "logps/rejected": -260.3175048828125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -2.3407669067382812, "rewards/margins": 7.722709655761719, "rewards/rejected": -10.063475608825684, "step": 721 }, { "epoch": 0.25, "learning_rate": 1.954809364715648e-06, "logits/chosen": -0.9998190402984619, "logits/rejected": -0.9633854627609253, "logps/chosen": -263.5771484375, "logps/rejected": -372.28790283203125, "loss": 0.0377, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7564330101013184, "rewards/margins": 12.851944923400879, "rewards/rejected": -13.608379364013672, "step": 722 }, { "epoch": 0.25, "learning_rate": 1.954644943597379e-06, "logits/chosen": -0.9176173210144043, "logits/rejected": -0.8739544749259949, "logps/chosen": -247.86239624023438, "logps/rejected": -287.1619873046875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.9617117047309875, "rewards/margins": 10.32783317565918, "rewards/rejected": -11.289546966552734, "step": 723 }, { "epoch": 0.25, "learning_rate": 1.954480230848254e-06, "logits/chosen": -1.090673565864563, "logits/rejected": -1.0670137405395508, "logps/chosen": -208.96876525878906, "logps/rejected": -365.140380859375, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -1.3911654949188232, "rewards/margins": 12.849706649780273, "rewards/rejected": -14.24087142944336, "step": 724 }, { "epoch": 0.25, "learning_rate": 1.9543152265185905e-06, "logits/chosen": -1.0440469980239868, "logits/rejected": -1.0159906148910522, "logps/chosen": -192.85443115234375, "logps/rejected": -242.52159118652344, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -1.6569701433181763, "rewards/margins": 9.571624755859375, "rewards/rejected": -11.228595733642578, "step": 725 }, { "epoch": 0.25, "learning_rate": 1.9541499306587952e-06, "logits/chosen": -0.9712510704994202, "logits/rejected": -0.9522892236709595, "logps/chosen": -131.22828674316406, "logps/rejected": -244.82388305664062, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -1.1699011325836182, "rewards/margins": 9.348078727722168, "rewards/rejected": -10.517979621887207, "step": 726 }, { "epoch": 0.25, "learning_rate": 1.9539843433193635e-06, "logits/chosen": -1.0823265314102173, "logits/rejected": -1.046478271484375, "logps/chosen": -240.0398406982422, "logps/rejected": -279.86260986328125, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -1.1789504289627075, "rewards/margins": 10.211504936218262, "rewards/rejected": -11.39045524597168, "step": 727 }, { "epoch": 0.25, "learning_rate": 1.953818464550881e-06, "logits/chosen": -1.025438904762268, "logits/rejected": -0.9921610951423645, "logps/chosen": -201.55787658691406, "logps/rejected": -258.1631164550781, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -1.1022695302963257, "rewards/margins": 7.782971382141113, "rewards/rejected": -8.88524055480957, "step": 728 }, { "epoch": 0.25, "learning_rate": 1.95365229440402e-06, "logits/chosen": -1.029396653175354, "logits/rejected": -1.014448642730713, "logps/chosen": -167.8575439453125, "logps/rejected": -318.30474853515625, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.5523455142974854, "rewards/margins": 10.9169340133667, "rewards/rejected": -11.469279289245605, "step": 729 }, { "epoch": 0.25, "learning_rate": 1.9534858329295436e-06, "logits/chosen": -0.9820784330368042, "logits/rejected": -0.9529905319213867, "logps/chosen": -158.54441833496094, "logps/rejected": -224.41085815429688, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -1.3319497108459473, "rewards/margins": 8.846298217773438, "rewards/rejected": -10.17824649810791, "step": 730 }, { "epoch": 0.25, "learning_rate": 1.9533190801783036e-06, "logits/chosen": -1.092114806175232, "logits/rejected": -1.0628927946090698, "logps/chosen": -175.5540771484375, "logps/rejected": -230.63792419433594, "loss": 0.0398, "rewards/accuracies": 0.9375, "rewards/chosen": -0.43649822473526, "rewards/margins": 7.742192268371582, "rewards/rejected": -8.178689956665039, "step": 731 }, { "epoch": 0.25, "learning_rate": 1.953152036201241e-06, "logits/chosen": -1.037958025932312, "logits/rejected": -1.0132925510406494, "logps/chosen": -172.29307556152344, "logps/rejected": -236.33262634277344, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -1.4862734079360962, "rewards/margins": 7.219815254211426, "rewards/rejected": -8.70608901977539, "step": 732 }, { "epoch": 0.25, "learning_rate": 1.952984701049385e-06, "logits/chosen": -1.0571666955947876, "logits/rejected": -1.0586482286453247, "logps/chosen": -156.11614990234375, "logps/rejected": -302.1845703125, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.7313134670257568, "rewards/margins": 9.760934829711914, "rewards/rejected": -10.49224853515625, "step": 733 }, { "epoch": 0.25, "learning_rate": 1.952817074773854e-06, "logits/chosen": -1.0101306438446045, "logits/rejected": -0.9884994626045227, "logps/chosen": -219.40472412109375, "logps/rejected": -284.7286682128906, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -0.9342027902603149, "rewards/margins": 8.647594451904297, "rewards/rejected": -9.58179759979248, "step": 734 }, { "epoch": 0.25, "learning_rate": 1.9526491574258565e-06, "logits/chosen": -1.0458420515060425, "logits/rejected": -1.0214357376098633, "logps/chosen": -223.212646484375, "logps/rejected": -306.7422180175781, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -1.568542718887329, "rewards/margins": 9.681532859802246, "rewards/rejected": -11.250076293945312, "step": 735 }, { "epoch": 0.25, "learning_rate": 1.9524809490566874e-06, "logits/chosen": -0.9083909392356873, "logits/rejected": -0.8652333617210388, "logps/chosen": -245.046630859375, "logps/rejected": -293.3311767578125, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.033893242478370667, "rewards/margins": 9.764790534973145, "rewards/rejected": -9.798683166503906, "step": 736 }, { "epoch": 0.25, "learning_rate": 1.952312449717734e-06, "logits/chosen": -1.0090930461883545, "logits/rejected": -0.98747318983078, "logps/chosen": -192.131103515625, "logps/rejected": -307.031494140625, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -1.0229976177215576, "rewards/margins": 10.84591007232666, "rewards/rejected": -11.86890697479248, "step": 737 }, { "epoch": 0.25, "learning_rate": 1.9521436594604683e-06, "logits/chosen": -1.0537662506103516, "logits/rejected": -1.0187963247299194, "logps/chosen": -180.94674682617188, "logps/rejected": -239.36280822753906, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -2.068143844604492, "rewards/margins": 8.320053100585938, "rewards/rejected": -10.38819694519043, "step": 738 }, { "epoch": 0.25, "learning_rate": 1.951974578336455e-06, "logits/chosen": -1.0306092500686646, "logits/rejected": -0.9958199262619019, "logps/chosen": -218.8263397216797, "logps/rejected": -265.4399719238281, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.44374769926071167, "rewards/margins": 10.240249633789062, "rewards/rejected": -10.683998107910156, "step": 739 }, { "epoch": 0.25, "learning_rate": 1.9518052063973454e-06, "logits/chosen": -1.0876193046569824, "logits/rejected": -1.0705645084381104, "logps/chosen": -238.48695373535156, "logps/rejected": -346.08258056640625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.7318416833877563, "rewards/margins": 10.046793937683105, "rewards/rejected": -11.778635025024414, "step": 740 }, { "epoch": 0.25, "learning_rate": 1.951635543694881e-06, "logits/chosen": -1.0538681745529175, "logits/rejected": -1.0420958995819092, "logps/chosen": -224.4286651611328, "logps/rejected": -364.5915832519531, "loss": 0.0283, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3351020812988281, "rewards/margins": 11.157179832458496, "rewards/rejected": -12.49228286743164, "step": 741 }, { "epoch": 0.25, "learning_rate": 1.9514655902808903e-06, "logits/chosen": -1.0360904932022095, "logits/rejected": -0.9968668222427368, "logps/chosen": -227.98138427734375, "logps/rejected": -283.56634521484375, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.7474817037582397, "rewards/margins": 11.15524673461914, "rewards/rejected": -11.902728080749512, "step": 742 }, { "epoch": 0.25, "learning_rate": 1.951295346207292e-06, "logits/chosen": -0.9591007232666016, "logits/rejected": -0.9284868836402893, "logps/chosen": -124.56591796875, "logps/rejected": -164.18258666992188, "loss": 0.0758, "rewards/accuracies": 0.9375, "rewards/chosen": -1.439741611480713, "rewards/margins": 6.308020114898682, "rewards/rejected": -7.747760772705078, "step": 743 }, { "epoch": 0.25, "learning_rate": 1.9511248115260944e-06, "logits/chosen": -0.9359673261642456, "logits/rejected": -0.933637261390686, "logps/chosen": -175.418212890625, "logps/rejected": -319.31854248046875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.015595316886901855, "rewards/margins": 10.340923309326172, "rewards/rejected": -10.32532787322998, "step": 744 }, { "epoch": 0.25, "learning_rate": 1.950953986289392e-06, "logits/chosen": -1.0296440124511719, "logits/rejected": -1.0070277452468872, "logps/chosen": -220.123046875, "logps/rejected": -321.7061462402344, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -2.633110523223877, "rewards/margins": 10.794480323791504, "rewards/rejected": -13.427591323852539, "step": 745 }, { "epoch": 0.25, "learning_rate": 1.9507828705493707e-06, "logits/chosen": -0.9862819314002991, "logits/rejected": -0.9720954895019531, "logps/chosen": -156.6352081298828, "logps/rejected": -249.4968719482422, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -1.5006235837936401, "rewards/margins": 9.585672378540039, "rewards/rejected": -11.086296081542969, "step": 746 }, { "epoch": 0.25, "learning_rate": 1.950611464358303e-06, "logits/chosen": -1.0492608547210693, "logits/rejected": -1.0266231298446655, "logps/chosen": -138.26776123046875, "logps/rejected": -256.4071350097656, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.8758758306503296, "rewards/margins": 8.53162956237793, "rewards/rejected": -9.407505989074707, "step": 747 }, { "epoch": 0.26, "learning_rate": 1.9504397677685523e-06, "logits/chosen": -1.1633754968643188, "logits/rejected": -1.144386649131775, "logps/chosen": -206.99920654296875, "logps/rejected": -298.22015380859375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.8632767200469971, "rewards/margins": 9.051511764526367, "rewards/rejected": -9.914789199829102, "step": 748 }, { "epoch": 0.26, "learning_rate": 1.9502677808325684e-06, "logits/chosen": -0.9828904867172241, "logits/rejected": -0.9327083230018616, "logps/chosen": -177.26971435546875, "logps/rejected": -168.60443115234375, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.5405882000923157, "rewards/margins": 7.022565841674805, "rewards/rejected": -7.5631537437438965, "step": 749 }, { "epoch": 0.26, "learning_rate": 1.950095503602892e-06, "logits/chosen": -0.9495849609375, "logits/rejected": -0.9323869347572327, "logps/chosen": -166.576416015625, "logps/rejected": -290.8043212890625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.18948689103126526, "rewards/margins": 10.445101737976074, "rewards/rejected": -10.634588241577148, "step": 750 }, { "epoch": 0.26, "learning_rate": 1.94992293613215e-06, "logits/chosen": -1.047509789466858, "logits/rejected": -1.019056797027588, "logps/chosen": -166.70558166503906, "logps/rejected": -196.9326171875, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": 0.15020820498466492, "rewards/margins": 7.005769729614258, "rewards/rejected": -6.855562210083008, "step": 751 }, { "epoch": 0.26, "learning_rate": 1.949750078473061e-06, "logits/chosen": -1.0370441675186157, "logits/rejected": -1.004909634590149, "logps/chosen": -216.52410888671875, "logps/rejected": -263.7304382324219, "loss": 0.0806, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07218784093856812, "rewards/margins": 10.644207000732422, "rewards/rejected": -10.716395378112793, "step": 752 }, { "epoch": 0.26, "learning_rate": 1.9495769306784297e-06, "logits/chosen": -1.0171712636947632, "logits/rejected": -0.9991689324378967, "logps/chosen": -285.1582336425781, "logps/rejected": -380.609619140625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4287153482437134, "rewards/margins": 12.627045631408691, "rewards/rejected": -14.05575942993164, "step": 753 }, { "epoch": 0.26, "learning_rate": 1.9494034928011507e-06, "logits/chosen": -1.018312931060791, "logits/rejected": -0.9938763380050659, "logps/chosen": -247.7843017578125, "logps/rejected": -377.1280517578125, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.12460573017597198, "rewards/margins": 11.427064895629883, "rewards/rejected": -11.551671981811523, "step": 754 }, { "epoch": 0.26, "learning_rate": 1.9492297648942066e-06, "logits/chosen": -1.0536798238754272, "logits/rejected": -1.0342413187026978, "logps/chosen": -170.64947509765625, "logps/rejected": -263.96539306640625, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.255079984664917, "rewards/margins": 9.020431518554688, "rewards/rejected": -10.275511741638184, "step": 755 }, { "epoch": 0.26, "learning_rate": 1.9490557470106686e-06, "logits/chosen": -1.039737582206726, "logits/rejected": -1.0160952806472778, "logps/chosen": -236.89364624023438, "logps/rejected": -352.935546875, "loss": 0.0412, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9015805721282959, "rewards/margins": 11.505945205688477, "rewards/rejected": -12.407526016235352, "step": 756 }, { "epoch": 0.26, "learning_rate": 1.9488814392036972e-06, "logits/chosen": -1.0088794231414795, "logits/rejected": -1.002709150314331, "logps/chosen": -214.56251525878906, "logps/rejected": -303.3090515136719, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.1975732296705246, "rewards/margins": 8.257833480834961, "rewards/rejected": -8.455406188964844, "step": 757 }, { "epoch": 0.26, "learning_rate": 1.9487068415265413e-06, "logits/chosen": -1.105947732925415, "logits/rejected": -1.082002878189087, "logps/chosen": -195.3529815673828, "logps/rejected": -289.21197509765625, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -1.5590152740478516, "rewards/margins": 9.948806762695312, "rewards/rejected": -11.50782299041748, "step": 758 }, { "epoch": 0.26, "learning_rate": 1.9485319540325376e-06, "logits/chosen": -0.9894053936004639, "logits/rejected": -0.9891807436943054, "logps/chosen": -151.44021606445312, "logps/rejected": -225.41586303710938, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -2.3758976459503174, "rewards/margins": 6.379160404205322, "rewards/rejected": -8.755057334899902, "step": 759 }, { "epoch": 0.26, "learning_rate": 1.948356776775112e-06, "logits/chosen": -1.072550654411316, "logits/rejected": -1.0408027172088623, "logps/chosen": -236.94461059570312, "logps/rejected": -304.6432800292969, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.7275383472442627, "rewards/margins": 10.037242889404297, "rewards/rejected": -10.76478099822998, "step": 760 }, { "epoch": 0.26, "learning_rate": 1.948181309807779e-06, "logits/chosen": -1.0023822784423828, "logits/rejected": -0.956725001335144, "logps/chosen": -170.64166259765625, "logps/rejected": -199.2783203125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.0980122089385986, "rewards/margins": 6.840446949005127, "rewards/rejected": -7.938459396362305, "step": 761 }, { "epoch": 0.26, "learning_rate": 1.9480055531841403e-06, "logits/chosen": -1.0310347080230713, "logits/rejected": -0.983008086681366, "logps/chosen": -204.30674743652344, "logps/rejected": -253.14822387695312, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.5671306848526001, "rewards/margins": 8.871343612670898, "rewards/rejected": -9.438474655151367, "step": 762 }, { "epoch": 0.26, "learning_rate": 1.9478295069578887e-06, "logits/chosen": -1.043747901916504, "logits/rejected": -0.9916350841522217, "logps/chosen": -239.8550567626953, "logps/rejected": -277.6185302734375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0332937240600586, "rewards/margins": 10.74476432800293, "rewards/rejected": -11.778057098388672, "step": 763 }, { "epoch": 0.26, "learning_rate": 1.9476531711828025e-06, "logits/chosen": -0.9518734812736511, "logits/rejected": -0.9156076312065125, "logps/chosen": -200.33807373046875, "logps/rejected": -324.8974914550781, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.06077189743518829, "rewards/margins": 11.360508918762207, "rewards/rejected": -11.421280860900879, "step": 764 }, { "epoch": 0.26, "learning_rate": 1.9474765459127503e-06, "logits/chosen": -1.1238903999328613, "logits/rejected": -1.09746253490448, "logps/chosen": -213.335205078125, "logps/rejected": -304.16162109375, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -0.7386670112609863, "rewards/margins": 10.11711597442627, "rewards/rejected": -10.855782508850098, "step": 765 }, { "epoch": 0.26, "learning_rate": 1.947299631201689e-06, "logits/chosen": -1.0617741346359253, "logits/rejected": -1.0389200448989868, "logps/chosen": -176.23475646972656, "logps/rejected": -222.38299560546875, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -1.731882929801941, "rewards/margins": 6.324265480041504, "rewards/rejected": -8.056148529052734, "step": 766 }, { "epoch": 0.26, "learning_rate": 1.947122427103664e-06, "logits/chosen": -1.1520075798034668, "logits/rejected": -1.1184120178222656, "logps/chosen": -197.86245727539062, "logps/rejected": -251.44081115722656, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -0.3100743293762207, "rewards/margins": 9.912652015686035, "rewards/rejected": -10.222725868225098, "step": 767 }, { "epoch": 0.26, "learning_rate": 1.9469449336728074e-06, "logits/chosen": -1.05258309841156, "logits/rejected": -1.0413191318511963, "logps/chosen": -207.61082458496094, "logps/rejected": -355.1391906738281, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.8743066787719727, "rewards/margins": 11.29149341583252, "rewards/rejected": -13.165800094604492, "step": 768 }, { "epoch": 0.26, "learning_rate": 1.9467671509633418e-06, "logits/chosen": -1.0812410116195679, "logits/rejected": -1.0539765357971191, "logps/chosen": -155.68411254882812, "logps/rejected": -228.27587890625, "loss": 0.062, "rewards/accuracies": 0.9375, "rewards/chosen": -0.34960079193115234, "rewards/margins": 8.858339309692383, "rewards/rejected": -9.207940101623535, "step": 769 }, { "epoch": 0.26, "learning_rate": 1.946589079029577e-06, "logits/chosen": -1.0615862607955933, "logits/rejected": -1.0282047986984253, "logps/chosen": -231.00767517089844, "logps/rejected": -365.0166320800781, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": 0.26154831051826477, "rewards/margins": 11.904617309570312, "rewards/rejected": -11.64306926727295, "step": 770 }, { "epoch": 0.26, "learning_rate": 1.946410717925912e-06, "logits/chosen": -1.0794121026992798, "logits/rejected": -1.0423554182052612, "logps/chosen": -191.58251953125, "logps/rejected": -280.5470275878906, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.27765408158302307, "rewards/margins": 9.988017082214355, "rewards/rejected": -10.26567268371582, "step": 771 }, { "epoch": 0.26, "learning_rate": 1.9462320677068334e-06, "logits/chosen": -0.9957703351974487, "logits/rejected": -0.9684576392173767, "logps/chosen": -305.0855407714844, "logps/rejected": -412.73541259765625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.4998443126678467, "rewards/margins": 12.942200660705566, "rewards/rejected": -12.44235610961914, "step": 772 }, { "epoch": 0.26, "learning_rate": 1.9460531284269166e-06, "logits/chosen": -1.0726808309555054, "logits/rejected": -1.053087830543518, "logps/chosen": -128.5971221923828, "logps/rejected": -201.69561767578125, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -0.7949416637420654, "rewards/margins": 8.036491394042969, "rewards/rejected": -8.831433296203613, "step": 773 }, { "epoch": 0.26, "learning_rate": 1.945873900140825e-06, "logits/chosen": -1.0289958715438843, "logits/rejected": -0.9965192675590515, "logps/chosen": -180.53880310058594, "logps/rejected": -256.6683654785156, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -1.1225323677062988, "rewards/margins": 7.733715534210205, "rewards/rejected": -8.85624885559082, "step": 774 }, { "epoch": 0.26, "learning_rate": 1.94569438290331e-06, "logits/chosen": -1.0982741117477417, "logits/rejected": -1.0601651668548584, "logps/chosen": -206.06533813476562, "logps/rejected": -280.96722412109375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.2762742042541504, "rewards/margins": 10.917946815490723, "rewards/rejected": -11.194221496582031, "step": 775 }, { "epoch": 0.26, "learning_rate": 1.9455145767692116e-06, "logits/chosen": -0.9900319576263428, "logits/rejected": -0.9551365971565247, "logps/chosen": -206.55262756347656, "logps/rejected": -308.3834228515625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.9811997413635254, "rewards/margins": 10.758684158325195, "rewards/rejected": -11.739885330200195, "step": 776 }, { "epoch": 0.27, "learning_rate": 1.945334481793459e-06, "logits/chosen": -1.0437582731246948, "logits/rejected": -1.0031243562698364, "logps/chosen": -209.15399169921875, "logps/rejected": -229.55581665039062, "loss": 0.027, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3188239336013794, "rewards/margins": 7.040033340454102, "rewards/rejected": -8.358858108520508, "step": 777 }, { "epoch": 0.27, "learning_rate": 1.9451540980310676e-06, "logits/chosen": -1.1074892282485962, "logits/rejected": -1.0817362070083618, "logps/chosen": -197.02809143066406, "logps/rejected": -225.67381286621094, "loss": 0.0588, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4717766046524048, "rewards/margins": 6.591529846191406, "rewards/rejected": -7.063307762145996, "step": 778 }, { "epoch": 0.27, "learning_rate": 1.9449734255371426e-06, "logits/chosen": -1.0973799228668213, "logits/rejected": -1.0536668300628662, "logps/chosen": -152.090576171875, "logps/rejected": -248.44854736328125, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.26239851117134094, "rewards/margins": 9.845352172851562, "rewards/rejected": -10.107751846313477, "step": 779 }, { "epoch": 0.27, "learning_rate": 1.9447924643668773e-06, "logits/chosen": -0.9489179849624634, "logits/rejected": -0.9437491297721863, "logps/chosen": -130.003173828125, "logps/rejected": -242.88116455078125, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.9272334575653076, "rewards/margins": 9.189410209655762, "rewards/rejected": -10.116643905639648, "step": 780 }, { "epoch": 0.27, "learning_rate": 1.9446112145755523e-06, "logits/chosen": -1.0469890832901, "logits/rejected": -1.0300461053848267, "logps/chosen": -231.4445037841797, "logps/rejected": -341.1346435546875, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.6944682002067566, "rewards/margins": 9.50288200378418, "rewards/rejected": -10.197351455688477, "step": 781 }, { "epoch": 0.27, "learning_rate": 1.9444296762185375e-06, "logits/chosen": -1.1110026836395264, "logits/rejected": -1.07088041305542, "logps/chosen": -276.91094970703125, "logps/rejected": -380.3683166503906, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -0.6262519359588623, "rewards/margins": 13.008890151977539, "rewards/rejected": -13.635143280029297, "step": 782 }, { "epoch": 0.27, "learning_rate": 1.94424784935129e-06, "logits/chosen": -1.041142225265503, "logits/rejected": -1.0036901235580444, "logps/chosen": -233.6277313232422, "logps/rejected": -324.5497741699219, "loss": 0.0679, "rewards/accuracies": 0.9375, "rewards/chosen": -0.19557681679725647, "rewards/margins": 11.295278549194336, "rewards/rejected": -11.490854263305664, "step": 783 }, { "epoch": 0.27, "learning_rate": 1.9440657340293558e-06, "logits/chosen": -1.0826420783996582, "logits/rejected": -1.0578852891921997, "logps/chosen": -202.11407470703125, "logps/rejected": -309.2381286621094, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 0.224388986825943, "rewards/margins": 9.314530372619629, "rewards/rejected": -9.090140342712402, "step": 784 }, { "epoch": 0.27, "learning_rate": 1.9438833303083674e-06, "logits/chosen": -1.0524299144744873, "logits/rejected": -1.0240613222122192, "logps/chosen": -222.60166931152344, "logps/rejected": -290.33795166015625, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.7428888082504272, "rewards/margins": 10.891409873962402, "rewards/rejected": -11.634296417236328, "step": 785 }, { "epoch": 0.27, "learning_rate": 1.943700638244048e-06, "logits/chosen": -0.9789007306098938, "logits/rejected": -0.9562532305717468, "logps/chosen": -190.0133514404297, "logps/rejected": -277.18743896484375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.4050355851650238, "rewards/margins": 10.693689346313477, "rewards/rejected": -11.098724365234375, "step": 786 }, { "epoch": 0.27, "learning_rate": 1.9435176578922066e-06, "logits/chosen": -0.9706823825836182, "logits/rejected": -0.9216602444648743, "logps/chosen": -217.6117401123047, "logps/rejected": -204.2722930908203, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -1.5783863067626953, "rewards/margins": 6.0648040771484375, "rewards/rejected": -7.643189907073975, "step": 787 }, { "epoch": 0.27, "learning_rate": 1.943334389308742e-06, "logits/chosen": -0.9812697768211365, "logits/rejected": -0.9477900266647339, "logps/chosen": -219.23675537109375, "logps/rejected": -337.77947998046875, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -0.016209319233894348, "rewards/margins": 11.505671501159668, "rewards/rejected": -11.521881103515625, "step": 788 }, { "epoch": 0.27, "learning_rate": 1.9431508325496395e-06, "logits/chosen": -1.0744049549102783, "logits/rejected": -1.0395362377166748, "logps/chosen": -137.0393829345703, "logps/rejected": -220.01246643066406, "loss": 0.0312, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3522194027900696, "rewards/margins": 9.973899841308594, "rewards/rejected": -10.326119422912598, "step": 789 }, { "epoch": 0.27, "learning_rate": 1.9429669876709734e-06, "logits/chosen": -0.9924090504646301, "logits/rejected": -0.9688755869865417, "logps/chosen": -220.31455993652344, "logps/rejected": -260.44757080078125, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 0.22567623853683472, "rewards/margins": 7.227943420410156, "rewards/rejected": -7.002267837524414, "step": 790 }, { "epoch": 0.27, "learning_rate": 1.942782854728906e-06, "logits/chosen": -1.0381958484649658, "logits/rejected": -1.0012682676315308, "logps/chosen": -235.77685546875, "logps/rejected": -253.52090454101562, "loss": 0.0377, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7419018745422363, "rewards/margins": 8.101896286010742, "rewards/rejected": -8.843798637390137, "step": 791 }, { "epoch": 0.27, "learning_rate": 1.942598433779687e-06, "logits/chosen": -1.0780380964279175, "logits/rejected": -1.0550390481948853, "logps/chosen": -265.5757751464844, "logps/rejected": -312.2782287597656, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.6374695301055908, "rewards/margins": 9.637901306152344, "rewards/rejected": -10.275372505187988, "step": 792 }, { "epoch": 0.27, "learning_rate": 1.9424137248796548e-06, "logits/chosen": -1.1324288845062256, "logits/rejected": -1.0742599964141846, "logps/chosen": -260.30413818359375, "logps/rejected": -327.2004089355469, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.0027879476547241, "rewards/margins": 11.605653762817383, "rewards/rejected": -12.608442306518555, "step": 793 }, { "epoch": 0.27, "learning_rate": 1.9422287280852346e-06, "logits/chosen": -1.0827745199203491, "logits/rejected": -1.0662200450897217, "logps/chosen": -172.47344970703125, "logps/rejected": -277.8626403808594, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.36757877469062805, "rewards/margins": 10.64747142791748, "rewards/rejected": -11.015049934387207, "step": 794 }, { "epoch": 0.27, "learning_rate": 1.942043443452942e-06, "logits/chosen": -1.112928032875061, "logits/rejected": -1.081753134727478, "logps/chosen": -242.286865234375, "logps/rejected": -307.32696533203125, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -0.09244191646575928, "rewards/margins": 9.616131782531738, "rewards/rejected": -9.708573341369629, "step": 795 }, { "epoch": 0.27, "learning_rate": 1.941857871039377e-06, "logits/chosen": -0.9639772772789001, "logits/rejected": -0.9621104598045349, "logps/chosen": -144.23629760742188, "logps/rejected": -249.07496643066406, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -1.179353952407837, "rewards/margins": 8.032196998596191, "rewards/rejected": -9.211549758911133, "step": 796 }, { "epoch": 0.27, "learning_rate": 1.941672010901231e-06, "logits/chosen": -1.069243311882019, "logits/rejected": -1.0400861501693726, "logps/chosen": -261.6617431640625, "logps/rejected": -307.3459167480469, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 0.08820490539073944, "rewards/margins": 9.972457885742188, "rewards/rejected": -9.884252548217773, "step": 797 }, { "epoch": 0.27, "learning_rate": 1.9414858630952806e-06, "logits/chosen": -1.091757893562317, "logits/rejected": -1.0474680662155151, "logps/chosen": -194.3960418701172, "logps/rejected": -237.06552124023438, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -0.8743416666984558, "rewards/margins": 7.766237258911133, "rewards/rejected": -8.640579223632812, "step": 798 }, { "epoch": 0.27, "learning_rate": 1.941299427678392e-06, "logits/chosen": -1.116590976715088, "logits/rejected": -1.0838079452514648, "logps/chosen": -199.7047882080078, "logps/rejected": -292.9176940917969, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.3206162452697754, "rewards/margins": 8.895312309265137, "rewards/rejected": -10.215927124023438, "step": 799 }, { "epoch": 0.27, "learning_rate": 1.9411127047075184e-06, "logits/chosen": -1.1249170303344727, "logits/rejected": -1.0976489782333374, "logps/chosen": -207.34591674804688, "logps/rejected": -367.2522888183594, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -1.6005287170410156, "rewards/margins": 12.673116683959961, "rewards/rejected": -14.273646354675293, "step": 800 }, { "epoch": 0.27, "learning_rate": 1.940925694239701e-06, "logits/chosen": -1.094280481338501, "logits/rejected": -1.0723917484283447, "logps/chosen": -198.71707153320312, "logps/rejected": -298.79864501953125, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.1200875043869019, "rewards/margins": 10.695478439331055, "rewards/rejected": -11.815566062927246, "step": 801 }, { "epoch": 0.27, "learning_rate": 1.94073839633207e-06, "logits/chosen": -1.0042552947998047, "logits/rejected": -0.9812450408935547, "logps/chosen": -170.00009155273438, "logps/rejected": -285.9131774902344, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -1.3885165452957153, "rewards/margins": 10.228734970092773, "rewards/rejected": -11.6172513961792, "step": 802 }, { "epoch": 0.27, "learning_rate": 1.94055081104184e-06, "logits/chosen": -0.9712838530540466, "logits/rejected": -0.944934606552124, "logps/chosen": -235.44984436035156, "logps/rejected": -401.6217041015625, "loss": 0.0352, "rewards/accuracies": 0.9375, "rewards/chosen": -1.587630033493042, "rewards/margins": 14.00521183013916, "rewards/rejected": -15.592841148376465, "step": 803 }, { "epoch": 0.27, "learning_rate": 1.940362938426318e-06, "logits/chosen": -0.9982869029045105, "logits/rejected": -0.9735551476478577, "logps/chosen": -239.21578979492188, "logps/rejected": -334.84423828125, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -1.452852725982666, "rewards/margins": 10.338920593261719, "rewards/rejected": -11.791772842407227, "step": 804 }, { "epoch": 0.27, "learning_rate": 1.940174778542895e-06, "logits/chosen": -1.0793722867965698, "logits/rejected": -1.0508841276168823, "logps/chosen": -214.2778778076172, "logps/rejected": -308.7041931152344, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -0.27328312397003174, "rewards/margins": 12.46519660949707, "rewards/rejected": -12.738480567932129, "step": 805 }, { "epoch": 0.28, "learning_rate": 1.9399863314490525e-06, "logits/chosen": -0.97553551197052, "logits/rejected": -0.9484115839004517, "logps/chosen": -186.97476196289062, "logps/rejected": -228.43702697753906, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.8561476469039917, "rewards/margins": 6.584370136260986, "rewards/rejected": -8.44051742553711, "step": 806 }, { "epoch": 0.28, "learning_rate": 1.9397975972023576e-06, "logits/chosen": -1.1090940237045288, "logits/rejected": -1.0803478956222534, "logps/chosen": -244.12844848632812, "logps/rejected": -346.9898986816406, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.0681326389312744, "rewards/margins": 9.747342109680176, "rewards/rejected": -10.815475463867188, "step": 807 }, { "epoch": 0.28, "learning_rate": 1.939608575860466e-06, "logits/chosen": -1.124434232711792, "logits/rejected": -1.0955393314361572, "logps/chosen": -236.3013153076172, "logps/rejected": -344.81488037109375, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0008869171142578, "rewards/margins": 10.438840866088867, "rewards/rejected": -11.439726829528809, "step": 808 }, { "epoch": 0.28, "learning_rate": 1.9394192674811216e-06, "logits/chosen": -0.9972414374351501, "logits/rejected": -0.9735836982727051, "logps/chosen": -198.3253173828125, "logps/rejected": -299.41986083984375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.8136978149414062, "rewards/margins": 10.467976570129395, "rewards/rejected": -11.2816743850708, "step": 809 }, { "epoch": 0.28, "learning_rate": 1.9392296721221553e-06, "logits/chosen": -1.01274573802948, "logits/rejected": -0.9723597764968872, "logps/chosen": -262.8524169921875, "logps/rejected": -295.4405212402344, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.6371099948883057, "rewards/margins": 8.588518142700195, "rewards/rejected": -10.225626945495605, "step": 810 }, { "epoch": 0.28, "learning_rate": 1.9390397898414855e-06, "logits/chosen": -1.037533164024353, "logits/rejected": -1.008596658706665, "logps/chosen": -148.0140380859375, "logps/rejected": -231.796875, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -1.2464326620101929, "rewards/margins": 8.832186698913574, "rewards/rejected": -10.078619956970215, "step": 811 }, { "epoch": 0.28, "learning_rate": 1.9388496206971195e-06, "logits/chosen": -1.0507943630218506, "logits/rejected": -1.0249708890914917, "logps/chosen": -226.02456665039062, "logps/rejected": -312.41058349609375, "loss": 0.0327, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6024166941642761, "rewards/margins": 10.111900329589844, "rewards/rejected": -10.714317321777344, "step": 812 }, { "epoch": 0.28, "learning_rate": 1.9386591647471502e-06, "logits/chosen": -0.9627578854560852, "logits/rejected": -0.9454383850097656, "logps/chosen": -140.5768585205078, "logps/rejected": -229.63912963867188, "loss": 0.0436, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0919628143310547, "rewards/margins": 8.480905532836914, "rewards/rejected": -9.572868347167969, "step": 813 }, { "epoch": 0.28, "learning_rate": 1.9384684220497604e-06, "logits/chosen": -1.029641032218933, "logits/rejected": -1.0173234939575195, "logps/chosen": -184.85137939453125, "logps/rejected": -277.3277587890625, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.3663935363292694, "rewards/margins": 10.786227226257324, "rewards/rejected": -11.152620315551758, "step": 814 }, { "epoch": 0.28, "learning_rate": 1.9382773926632186e-06, "logits/chosen": -0.9894671440124512, "logits/rejected": -0.9559578895568848, "logps/chosen": -185.5834503173828, "logps/rejected": -280.6511535644531, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.1892471313476562, "rewards/margins": 10.679450035095215, "rewards/rejected": -11.868698120117188, "step": 815 }, { "epoch": 0.28, "learning_rate": 1.9380860766458817e-06, "logits/chosen": -0.9319298267364502, "logits/rejected": -0.9045693278312683, "logps/chosen": -223.9222412109375, "logps/rejected": -303.6947021484375, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -1.4615099430084229, "rewards/margins": 10.400415420532227, "rewards/rejected": -11.86192512512207, "step": 816 }, { "epoch": 0.28, "learning_rate": 1.937894474056194e-06, "logits/chosen": -0.9685192704200745, "logits/rejected": -0.9607897996902466, "logps/chosen": -203.247802734375, "logps/rejected": -318.7576904296875, "loss": 0.1126, "rewards/accuracies": 1.0, "rewards/chosen": -1.5190023183822632, "rewards/margins": 9.01166820526123, "rewards/rejected": -10.530670166015625, "step": 817 }, { "epoch": 0.28, "learning_rate": 1.937702584952688e-06, "logits/chosen": -0.9891261458396912, "logits/rejected": -0.9571197628974915, "logps/chosen": -201.5905303955078, "logps/rejected": -276.39056396484375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.09169231355190277, "rewards/margins": 9.418036460876465, "rewards/rejected": -9.509729385375977, "step": 818 }, { "epoch": 0.28, "learning_rate": 1.937510409393983e-06, "logits/chosen": -1.0214319229125977, "logits/rejected": -0.9974942207336426, "logps/chosen": -243.3999481201172, "logps/rejected": -324.42926025390625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.033982664346694946, "rewards/margins": 11.881404876708984, "rewards/rejected": -11.915386199951172, "step": 819 }, { "epoch": 0.28, "learning_rate": 1.9373179474387856e-06, "logits/chosen": -1.0121833086013794, "logits/rejected": -0.9731397032737732, "logps/chosen": -186.61416625976562, "logps/rejected": -272.29296875, "loss": 0.062, "rewards/accuracies": 0.875, "rewards/chosen": -0.6799789667129517, "rewards/margins": 9.51190185546875, "rewards/rejected": -10.19188117980957, "step": 820 }, { "epoch": 0.28, "learning_rate": 1.937125199145891e-06, "logits/chosen": -1.0452147722244263, "logits/rejected": -1.0482648611068726, "logps/chosen": -208.87969970703125, "logps/rejected": -347.78106689453125, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.7318013310432434, "rewards/margins": 11.76827621459961, "rewards/rejected": -12.500078201293945, "step": 821 }, { "epoch": 0.28, "learning_rate": 1.93693216457418e-06, "logits/chosen": -1.0615090131759644, "logits/rejected": -1.02176833152771, "logps/chosen": -178.60972595214844, "logps/rejected": -245.4754638671875, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -1.0029078722000122, "rewards/margins": 9.44143009185791, "rewards/rejected": -10.44433879852295, "step": 822 }, { "epoch": 0.28, "learning_rate": 1.9367388437826233e-06, "logits/chosen": -1.0012873411178589, "logits/rejected": -0.9793981313705444, "logps/chosen": -189.72427368164062, "logps/rejected": -314.29766845703125, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.029220700263977, "rewards/margins": 11.311951637268066, "rewards/rejected": -12.34117317199707, "step": 823 }, { "epoch": 0.28, "learning_rate": 1.9365452368302767e-06, "logits/chosen": -1.0683366060256958, "logits/rejected": -1.0436452627182007, "logps/chosen": -226.94430541992188, "logps/rejected": -312.728271484375, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -0.9351109266281128, "rewards/margins": 10.791228294372559, "rewards/rejected": -11.726339340209961, "step": 824 }, { "epoch": 0.28, "learning_rate": 1.9363513437762854e-06, "logits/chosen": -1.0160810947418213, "logits/rejected": -1.003857970237732, "logps/chosen": -233.7023162841797, "logps/rejected": -306.4039001464844, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -1.5164244174957275, "rewards/margins": 11.943695068359375, "rewards/rejected": -13.460119247436523, "step": 825 }, { "epoch": 0.28, "learning_rate": 1.93615716467988e-06, "logits/chosen": -1.0099185705184937, "logits/rejected": -0.9893419742584229, "logps/chosen": -214.89178466796875, "logps/rejected": -289.8096923828125, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5502689480781555, "rewards/margins": 10.096604347229004, "rewards/rejected": -10.646873474121094, "step": 826 }, { "epoch": 0.28, "learning_rate": 1.93596269960038e-06, "logits/chosen": -0.9502048492431641, "logits/rejected": -0.9281173944473267, "logps/chosen": -212.51290893554688, "logps/rejected": -297.61224365234375, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.4775673449039459, "rewards/margins": 9.179919242858887, "rewards/rejected": -9.657485961914062, "step": 827 }, { "epoch": 0.28, "learning_rate": 1.935767948597192e-06, "logits/chosen": -1.0550843477249146, "logits/rejected": -1.0401519536972046, "logps/chosen": -189.6421661376953, "logps/rejected": -247.73605346679688, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -0.9134374260902405, "rewards/margins": 8.329848289489746, "rewards/rejected": -9.2432861328125, "step": 828 }, { "epoch": 0.28, "learning_rate": 1.9355729117298093e-06, "logits/chosen": -0.8738510012626648, "logits/rejected": -0.8448121547698975, "logps/chosen": -180.7628936767578, "logps/rejected": -198.8745880126953, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.6031420826911926, "rewards/margins": 7.148312091827393, "rewards/rejected": -7.7514543533325195, "step": 829 }, { "epoch": 0.28, "learning_rate": 1.935377589057814e-06, "logits/chosen": -1.0552623271942139, "logits/rejected": -1.0340001583099365, "logps/chosen": -270.9123840332031, "logps/rejected": -348.7767333984375, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -1.3520234823226929, "rewards/margins": 10.523346900939941, "rewards/rejected": -11.875370979309082, "step": 830 }, { "epoch": 0.28, "learning_rate": 1.9351819806408727e-06, "logits/chosen": -0.9018321633338928, "logits/rejected": -0.890975296497345, "logps/chosen": -182.37115478515625, "logps/rejected": -313.24591064453125, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -1.6044912338256836, "rewards/margins": 10.09244441986084, "rewards/rejected": -11.69693660736084, "step": 831 }, { "epoch": 0.28, "learning_rate": 1.934986086538743e-06, "logits/chosen": -0.9137836694717407, "logits/rejected": -0.8944330811500549, "logps/chosen": -140.55935668945312, "logps/rejected": -210.5548858642578, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.571205735206604, "rewards/margins": 9.163553237915039, "rewards/rejected": -9.734759330749512, "step": 832 }, { "epoch": 0.28, "learning_rate": 1.9347899068112667e-06, "logits/chosen": -0.9740812182426453, "logits/rejected": -0.9502595067024231, "logps/chosen": -233.7429962158203, "logps/rejected": -332.0394287109375, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.3543546199798584, "rewards/margins": 12.223028182983398, "rewards/rejected": -12.577383041381836, "step": 833 }, { "epoch": 0.28, "learning_rate": 1.934593441518374e-06, "logits/chosen": -1.0290555953979492, "logits/rejected": -1.019731879234314, "logps/chosen": -197.7492218017578, "logps/rejected": -327.99957275390625, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.7007480263710022, "rewards/margins": 10.744217872619629, "rewards/rejected": -11.444965362548828, "step": 834 }, { "epoch": 0.28, "learning_rate": 1.9343966907200827e-06, "logits/chosen": -1.0376453399658203, "logits/rejected": -1.0274654626846313, "logps/chosen": -174.8538818359375, "logps/rejected": -266.9571228027344, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -1.5783758163452148, "rewards/margins": 8.237871170043945, "rewards/rejected": -9.816246032714844, "step": 835 }, { "epoch": 0.29, "learning_rate": 1.9341996544764974e-06, "logits/chosen": -1.098604679107666, "logits/rejected": -1.0722063779830933, "logps/chosen": -199.3244171142578, "logps/rejected": -305.7760314941406, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.5870643854141235, "rewards/margins": 11.379727363586426, "rewards/rejected": -11.966791152954102, "step": 836 }, { "epoch": 0.29, "learning_rate": 1.9340023328478097e-06, "logits/chosen": -0.9918259978294373, "logits/rejected": -0.969953715801239, "logps/chosen": -254.6311798095703, "logps/rejected": -365.63140869140625, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -1.1973134279251099, "rewards/margins": 11.511149406433105, "rewards/rejected": -12.70846176147461, "step": 837 }, { "epoch": 0.29, "learning_rate": 1.933804725894299e-06, "logits/chosen": -1.049131155014038, "logits/rejected": -1.0346219539642334, "logps/chosen": -229.33828735351562, "logps/rejected": -332.3741455078125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.1846335232257843, "rewards/margins": 12.240479469299316, "rewards/rejected": -12.425113677978516, "step": 838 }, { "epoch": 0.29, "learning_rate": 1.9336068336763318e-06, "logits/chosen": -1.026975393295288, "logits/rejected": -0.9959419965744019, "logps/chosen": -197.0389862060547, "logps/rejected": -338.9658203125, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -0.7697099447250366, "rewards/margins": 11.732500076293945, "rewards/rejected": -12.50221061706543, "step": 839 }, { "epoch": 0.29, "learning_rate": 1.9334086562543602e-06, "logits/chosen": -1.0407975912094116, "logits/rejected": -1.0265623331069946, "logps/chosen": -171.8576202392578, "logps/rejected": -248.04481506347656, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.21537230908870697, "rewards/margins": 9.94395923614502, "rewards/rejected": -10.159332275390625, "step": 840 }, { "epoch": 0.29, "learning_rate": 1.9332101936889258e-06, "logits/chosen": -0.9515625834465027, "logits/rejected": -0.94370436668396, "logps/chosen": -154.11354064941406, "logps/rejected": -208.8072052001953, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -1.726530909538269, "rewards/margins": 4.808549880981445, "rewards/rejected": -6.535080432891846, "step": 841 }, { "epoch": 0.29, "learning_rate": 1.9330114460406556e-06, "logits/chosen": -0.9389239549636841, "logits/rejected": -0.920370876789093, "logps/chosen": -278.41717529296875, "logps/rejected": -296.2261962890625, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -0.2906825542449951, "rewards/margins": 9.825318336486816, "rewards/rejected": -10.116000175476074, "step": 842 }, { "epoch": 0.29, "learning_rate": 1.9328124133702648e-06, "logits/chosen": -0.9671790599822998, "logits/rejected": -0.9515557885169983, "logps/chosen": -207.95510864257812, "logps/rejected": -295.43505859375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.1057404279708862, "rewards/margins": 9.209014892578125, "rewards/rejected": -10.314757347106934, "step": 843 }, { "epoch": 0.29, "learning_rate": 1.932613095738555e-06, "logits/chosen": -1.0056430101394653, "logits/rejected": -0.9986352324485779, "logps/chosen": -211.2509765625, "logps/rejected": -277.33990478515625, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -1.2114144563674927, "rewards/margins": 8.969715118408203, "rewards/rejected": -10.181131362915039, "step": 844 }, { "epoch": 0.29, "learning_rate": 1.932413493206414e-06, "logits/chosen": -1.0371410846710205, "logits/rejected": -1.0200828313827515, "logps/chosen": -204.2586212158203, "logps/rejected": -320.56854248046875, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.6090240478515625, "rewards/margins": 9.134452819824219, "rewards/rejected": -9.743477821350098, "step": 845 }, { "epoch": 0.29, "learning_rate": 1.9322136058348187e-06, "logits/chosen": -1.0931299924850464, "logits/rejected": -1.0781145095825195, "logps/chosen": -199.21385192871094, "logps/rejected": -283.4803466796875, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.6601632237434387, "rewards/margins": 8.99896240234375, "rewards/rejected": -9.659125328063965, "step": 846 }, { "epoch": 0.29, "learning_rate": 1.932013433684832e-06, "logits/chosen": -0.9497886300086975, "logits/rejected": -0.925977885723114, "logps/chosen": -239.35284423828125, "logps/rejected": -356.592041015625, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -0.9562163352966309, "rewards/margins": 10.482278823852539, "rewards/rejected": -11.438494682312012, "step": 847 }, { "epoch": 0.29, "learning_rate": 1.931812976817603e-06, "logits/chosen": -1.005487322807312, "logits/rejected": -0.9805780053138733, "logps/chosen": -180.95748901367188, "logps/rejected": -274.4371032714844, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5062355399131775, "rewards/margins": 10.030515670776367, "rewards/rejected": -10.536751747131348, "step": 848 }, { "epoch": 0.29, "learning_rate": 1.9316122352943692e-06, "logits/chosen": -0.9793025851249695, "logits/rejected": -0.9757740497589111, "logps/chosen": -238.69090270996094, "logps/rejected": -356.55499267578125, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.9484186172485352, "rewards/margins": 10.4981689453125, "rewards/rejected": -12.446586608886719, "step": 849 }, { "epoch": 0.29, "learning_rate": 1.9314112091764536e-06, "logits/chosen": -0.9952584505081177, "logits/rejected": -0.9631220102310181, "logps/chosen": -143.0758819580078, "logps/rejected": -226.27467346191406, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -0.7930772304534912, "rewards/margins": 10.453518867492676, "rewards/rejected": -11.246596336364746, "step": 850 }, { "epoch": 0.29, "learning_rate": 1.931209898525268e-06, "logits/chosen": -1.0100815296173096, "logits/rejected": -0.998589038848877, "logps/chosen": -202.17950439453125, "logps/rejected": -300.5162658691406, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.6906222105026245, "rewards/margins": 8.22895622253418, "rewards/rejected": -8.91957950592041, "step": 851 }, { "epoch": 0.29, "learning_rate": 1.931008303402309e-06, "logits/chosen": -0.9756053686141968, "logits/rejected": -0.9424832463264465, "logps/chosen": -215.59703063964844, "logps/rejected": -254.32046508789062, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": 0.5638027191162109, "rewards/margins": 9.750289916992188, "rewards/rejected": -9.186487197875977, "step": 852 }, { "epoch": 0.29, "learning_rate": 1.9308064238691616e-06, "logits/chosen": -0.9907859563827515, "logits/rejected": -0.9672561287879944, "logps/chosen": -181.5191650390625, "logps/rejected": -280.10711669921875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.8824477195739746, "rewards/margins": 10.356745719909668, "rewards/rejected": -11.239192008972168, "step": 853 }, { "epoch": 0.29, "learning_rate": 1.930604259987497e-06, "logits/chosen": -0.9483991861343384, "logits/rejected": -0.9326090216636658, "logps/chosen": -241.6248016357422, "logps/rejected": -273.9657287597656, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.5025368928909302, "rewards/margins": 8.482318878173828, "rewards/rejected": -8.984856605529785, "step": 854 }, { "epoch": 0.29, "learning_rate": 1.9304018118190733e-06, "logits/chosen": -1.0709445476531982, "logits/rejected": -1.0606677532196045, "logps/chosen": -225.2361602783203, "logps/rejected": -304.9345703125, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -1.0755462646484375, "rewards/margins": 8.74710750579834, "rewards/rejected": -9.822652816772461, "step": 855 }, { "epoch": 0.29, "learning_rate": 1.9301990794257364e-06, "logits/chosen": -0.9010380506515503, "logits/rejected": -0.8789607882499695, "logps/chosen": -163.17742919921875, "logps/rejected": -229.34999084472656, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.8768799304962158, "rewards/margins": 8.415107727050781, "rewards/rejected": -9.291988372802734, "step": 856 }, { "epoch": 0.29, "learning_rate": 1.9299960628694175e-06, "logits/chosen": -0.9359311461448669, "logits/rejected": -0.8930796980857849, "logps/chosen": -205.7366485595703, "logps/rejected": -262.7641906738281, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.0011588335037231, "rewards/margins": 9.538636207580566, "rewards/rejected": -10.539794921875, "step": 857 }, { "epoch": 0.29, "learning_rate": 1.929792762212136e-06, "logits/chosen": -1.000213623046875, "logits/rejected": -0.985423743724823, "logps/chosen": -183.5542755126953, "logps/rejected": -265.9300842285156, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -1.739134669303894, "rewards/margins": 9.34042739868164, "rewards/rejected": -11.079562187194824, "step": 858 }, { "epoch": 0.29, "learning_rate": 1.9295891775159965e-06, "logits/chosen": -0.9343293309211731, "logits/rejected": -0.9057914018630981, "logps/chosen": -179.0500030517578, "logps/rejected": -252.59152221679688, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.8686935305595398, "rewards/margins": 9.523134231567383, "rewards/rejected": -10.391828536987305, "step": 859 }, { "epoch": 0.29, "learning_rate": 1.9293853088431923e-06, "logits/chosen": -0.9513857364654541, "logits/rejected": -0.9126663208007812, "logps/chosen": -218.50631713867188, "logps/rejected": -281.409423828125, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.7572420239448547, "rewards/margins": 10.337820053100586, "rewards/rejected": -11.095062255859375, "step": 860 }, { "epoch": 0.29, "learning_rate": 1.929181156256002e-06, "logits/chosen": -0.9884921312332153, "logits/rejected": -0.9959762096405029, "logps/chosen": -217.20472717285156, "logps/rejected": -353.6378173828125, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.7583057880401611, "rewards/margins": 9.907675743103027, "rewards/rejected": -11.66598129272461, "step": 861 }, { "epoch": 0.29, "learning_rate": 1.9289767198167913e-06, "logits/chosen": -1.0324842929840088, "logits/rejected": -1.003731608390808, "logps/chosen": -187.63587951660156, "logps/rejected": -220.1991729736328, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.000202864408493042, "rewards/margins": 8.472026824951172, "rewards/rejected": -8.472229957580566, "step": 862 }, { "epoch": 0.29, "learning_rate": 1.928771999588013e-06, "logits/chosen": -0.9872896671295166, "logits/rejected": -0.9467839598655701, "logps/chosen": -247.7784423828125, "logps/rejected": -293.4754943847656, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 0.264417827129364, "rewards/margins": 10.949191093444824, "rewards/rejected": -10.684772491455078, "step": 863 }, { "epoch": 0.29, "learning_rate": 1.9285669956322058e-06, "logits/chosen": -0.9700373411178589, "logits/rejected": -0.9201055765151978, "logps/chosen": -242.62876892089844, "logps/rejected": -254.18142700195312, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.2527252733707428, "rewards/margins": 9.119209289550781, "rewards/rejected": -9.37193489074707, "step": 864 }, { "epoch": 0.3, "learning_rate": 1.9283617080119966e-06, "logits/chosen": -1.0108857154846191, "logits/rejected": -0.9713346362113953, "logps/chosen": -204.45675659179688, "logps/rejected": -270.5009460449219, "loss": 0.0256, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9167131185531616, "rewards/margins": 11.244760513305664, "rewards/rejected": -12.161474227905273, "step": 865 }, { "epoch": 0.3, "learning_rate": 1.928156136790097e-06, "logits/chosen": -0.9196022152900696, "logits/rejected": -0.9169475436210632, "logps/chosen": -160.88034057617188, "logps/rejected": -309.2760009765625, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.2545666694641113, "rewards/margins": 9.778338432312012, "rewards/rejected": -11.032903671264648, "step": 866 }, { "epoch": 0.3, "learning_rate": 1.927950282029306e-06, "logits/chosen": -1.019033432006836, "logits/rejected": -0.9941359162330627, "logps/chosen": -98.05113220214844, "logps/rejected": -209.09637451171875, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -1.0838029384613037, "rewards/margins": 8.688437461853027, "rewards/rejected": -9.77224063873291, "step": 867 }, { "epoch": 0.3, "learning_rate": 1.92774414379251e-06, "logits/chosen": -0.9767724871635437, "logits/rejected": -0.9536234140396118, "logps/chosen": -253.9425048828125, "logps/rejected": -319.9609375, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": -1.2797060012817383, "rewards/margins": 9.541102409362793, "rewards/rejected": -10.820807456970215, "step": 868 }, { "epoch": 0.3, "learning_rate": 1.9275377221426814e-06, "logits/chosen": -0.9480306506156921, "logits/rejected": -0.9256911277770996, "logps/chosen": -226.4746551513672, "logps/rejected": -311.0408935546875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.34282562136650085, "rewards/margins": 11.573043823242188, "rewards/rejected": -11.915868759155273, "step": 869 }, { "epoch": 0.3, "learning_rate": 1.927331017142879e-06, "logits/chosen": -0.9317431449890137, "logits/rejected": -0.8861099481582642, "logps/chosen": -172.78697204589844, "logps/rejected": -241.6155548095703, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -1.2752711772918701, "rewards/margins": 9.113483428955078, "rewards/rejected": -10.388754844665527, "step": 870 }, { "epoch": 0.3, "learning_rate": 1.9271240288562483e-06, "logits/chosen": -0.9996293187141418, "logits/rejected": -0.9845326542854309, "logps/chosen": -219.51611328125, "logps/rejected": -321.89093017578125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.0378696918487549, "rewards/margins": 10.745166778564453, "rewards/rejected": -11.783036231994629, "step": 871 }, { "epoch": 0.3, "learning_rate": 1.9269167573460217e-06, "logits/chosen": -1.056863784790039, "logits/rejected": -1.0375187397003174, "logps/chosen": -189.23402404785156, "logps/rejected": -277.53240966796875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -1.4303001165390015, "rewards/margins": 9.036324501037598, "rewards/rejected": -10.466625213623047, "step": 872 }, { "epoch": 0.3, "learning_rate": 1.926709202675517e-06, "logits/chosen": -0.970872163772583, "logits/rejected": -0.9709228277206421, "logps/chosen": -154.18568420410156, "logps/rejected": -284.95703125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.9610879421234131, "rewards/margins": 9.431053161621094, "rewards/rejected": -10.392141342163086, "step": 873 }, { "epoch": 0.3, "learning_rate": 1.92650136490814e-06, "logits/chosen": -0.9496920704841614, "logits/rejected": -0.9357080459594727, "logps/chosen": -221.81248474121094, "logps/rejected": -357.27484130859375, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -0.7936794757843018, "rewards/margins": 12.264055252075195, "rewards/rejected": -13.057735443115234, "step": 874 }, { "epoch": 0.3, "learning_rate": 1.926293244107382e-06, "logits/chosen": -0.9552091956138611, "logits/rejected": -0.9128001928329468, "logps/chosen": -199.87350463867188, "logps/rejected": -199.07275390625, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.4380735754966736, "rewards/margins": 7.043936252593994, "rewards/rejected": -7.482008934020996, "step": 875 }, { "epoch": 0.3, "learning_rate": 1.9260848403368207e-06, "logits/chosen": -1.0729538202285767, "logits/rejected": -1.0382226705551147, "logps/chosen": -204.41708374023438, "logps/rejected": -287.2272033691406, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.21695896983146667, "rewards/margins": 9.609122276306152, "rewards/rejected": -9.826082229614258, "step": 876 }, { "epoch": 0.3, "learning_rate": 1.9258761536601214e-06, "logits/chosen": -1.0167522430419922, "logits/rejected": -1.0083142518997192, "logps/chosen": -120.85055541992188, "logps/rejected": -273.4198303222656, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -1.7880127429962158, "rewards/margins": 10.94351577758789, "rewards/rejected": -12.731529235839844, "step": 877 }, { "epoch": 0.3, "learning_rate": 1.925667184141034e-06, "logits/chosen": -0.9195133447647095, "logits/rejected": -0.8926723599433899, "logps/chosen": -197.8523406982422, "logps/rejected": -271.81207275390625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.7482637763023376, "rewards/margins": 9.081665992736816, "rewards/rejected": -9.829930305480957, "step": 878 }, { "epoch": 0.3, "learning_rate": 1.9254579318433966e-06, "logits/chosen": -0.9689615964889526, "logits/rejected": -0.9331439733505249, "logps/chosen": -226.23402404785156, "logps/rejected": -343.4499206542969, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": 0.22645927965641022, "rewards/margins": 12.354127883911133, "rewards/rejected": -12.127668380737305, "step": 879 }, { "epoch": 0.3, "learning_rate": 1.925248396831133e-06, "logits/chosen": -1.04086434841156, "logits/rejected": -1.0187668800354004, "logps/chosen": -206.44943237304688, "logps/rejected": -266.4681396484375, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": 0.11551889777183533, "rewards/margins": 9.231032371520996, "rewards/rejected": -9.11551284790039, "step": 880 }, { "epoch": 0.3, "learning_rate": 1.9250385791682526e-06, "logits/chosen": -1.0470556020736694, "logits/rejected": -1.0098057985305786, "logps/chosen": -216.66773986816406, "logps/rejected": -281.8210144042969, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -1.2095208168029785, "rewards/margins": 9.42506217956543, "rewards/rejected": -10.634583473205566, "step": 881 }, { "epoch": 0.3, "learning_rate": 1.924828478918852e-06, "logits/chosen": -1.0119304656982422, "logits/rejected": -0.9779084920883179, "logps/chosen": -203.27200317382812, "logps/rejected": -301.66522216796875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.1556420773267746, "rewards/margins": 10.45581340789795, "rewards/rejected": -10.611454963684082, "step": 882 }, { "epoch": 0.3, "learning_rate": 1.9246180961471137e-06, "logits/chosen": -1.0588300228118896, "logits/rejected": -1.026898980140686, "logps/chosen": -299.52996826171875, "logps/rejected": -372.1836853027344, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 0.3490158021450043, "rewards/margins": 10.73047924041748, "rewards/rejected": -10.381464004516602, "step": 883 }, { "epoch": 0.3, "learning_rate": 1.9244074309173077e-06, "logits/chosen": -1.0070170164108276, "logits/rejected": -0.9901548027992249, "logps/chosen": -139.5128173828125, "logps/rejected": -264.7311706542969, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.4511034488677979, "rewards/margins": 9.656290054321289, "rewards/rejected": -11.107393264770508, "step": 884 }, { "epoch": 0.3, "learning_rate": 1.924196483293788e-06, "logits/chosen": -1.0551837682724, "logits/rejected": -1.0228503942489624, "logps/chosen": -262.3289489746094, "logps/rejected": -325.1467590332031, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.7488937973976135, "rewards/margins": 10.272261619567871, "rewards/rejected": -11.021156311035156, "step": 885 }, { "epoch": 0.3, "learning_rate": 1.9239852533409976e-06, "logits/chosen": -0.9506317377090454, "logits/rejected": -0.939907431602478, "logps/chosen": -178.3810272216797, "logps/rejected": -269.8082580566406, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.7728605270385742, "rewards/margins": 9.290365219116211, "rewards/rejected": -10.063223838806152, "step": 886 }, { "epoch": 0.3, "learning_rate": 1.9237737411234627e-06, "logits/chosen": -1.1212348937988281, "logits/rejected": -1.098868727684021, "logps/chosen": -247.46742248535156, "logps/rejected": -400.06451416015625, "loss": 0.0177, "rewards/accuracies": 0.9375, "rewards/chosen": -0.33888256549835205, "rewards/margins": 13.610142707824707, "rewards/rejected": -13.94902515411377, "step": 887 }, { "epoch": 0.3, "learning_rate": 1.923561946705799e-06, "logits/chosen": -1.0327966213226318, "logits/rejected": -1.0100573301315308, "logps/chosen": -208.7245635986328, "logps/rejected": -263.0655517578125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.6520805954933167, "rewards/margins": 9.052071571350098, "rewards/rejected": -9.704153060913086, "step": 888 }, { "epoch": 0.3, "learning_rate": 1.9233498701527054e-06, "logits/chosen": -1.0868380069732666, "logits/rejected": -1.0534141063690186, "logps/chosen": -182.2889862060547, "logps/rejected": -314.77490234375, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.7744584679603577, "rewards/margins": 11.121999740600586, "rewards/rejected": -11.89645767211914, "step": 889 }, { "epoch": 0.3, "learning_rate": 1.9231375115289696e-06, "logits/chosen": -0.9761578440666199, "logits/rejected": -0.9428455829620361, "logps/chosen": -239.27879333496094, "logps/rejected": -316.8129577636719, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.25320547819137573, "rewards/margins": 11.66235065460205, "rewards/rejected": -11.915556907653809, "step": 890 }, { "epoch": 0.3, "learning_rate": 1.922924870899463e-06, "logits/chosen": -0.9887001514434814, "logits/rejected": -0.9737218022346497, "logps/chosen": -167.52069091796875, "logps/rejected": -287.7154235839844, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.3677861988544464, "rewards/margins": 11.822511672973633, "rewards/rejected": -12.190298080444336, "step": 891 }, { "epoch": 0.3, "learning_rate": 1.9227119483291455e-06, "logits/chosen": -0.8830989599227905, "logits/rejected": -0.8818553686141968, "logps/chosen": -181.70436096191406, "logps/rejected": -304.6748352050781, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.7130815982818604, "rewards/margins": 7.790170669555664, "rewards/rejected": -9.503252029418945, "step": 892 }, { "epoch": 0.3, "learning_rate": 1.922498743883061e-06, "logits/chosen": -1.0382850170135498, "logits/rejected": -1.0135786533355713, "logps/chosen": -206.17681884765625, "logps/rejected": -298.6508483886719, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.02900300920009613, "rewards/margins": 10.696380615234375, "rewards/rejected": -10.725385665893555, "step": 893 }, { "epoch": 0.31, "learning_rate": 1.9222852576263415e-06, "logits/chosen": -0.9788870811462402, "logits/rejected": -0.9588122367858887, "logps/chosen": -188.08026123046875, "logps/rejected": -262.8671569824219, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.3329918384552002, "rewards/margins": 8.023165702819824, "rewards/rejected": -8.356158256530762, "step": 894 }, { "epoch": 0.31, "learning_rate": 1.922071489624203e-06, "logits/chosen": -0.9499731659889221, "logits/rejected": -0.917434811592102, "logps/chosen": -217.3775634765625, "logps/rejected": -290.0505065917969, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -1.1647487878799438, "rewards/margins": 11.109044075012207, "rewards/rejected": -12.27379322052002, "step": 895 }, { "epoch": 0.31, "learning_rate": 1.92185743994195e-06, "logits/chosen": -0.9314233064651489, "logits/rejected": -0.9140791893005371, "logps/chosen": -209.19296264648438, "logps/rejected": -322.63092041015625, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.2628892660140991, "rewards/margins": 12.831792831420898, "rewards/rejected": -13.094681739807129, "step": 896 }, { "epoch": 0.31, "learning_rate": 1.9216431086449703e-06, "logits/chosen": -1.0679861307144165, "logits/rejected": -1.0383715629577637, "logps/chosen": -206.03460693359375, "logps/rejected": -296.583251953125, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.4778159856796265, "rewards/margins": 9.771464347839355, "rewards/rejected": -11.249281883239746, "step": 897 }, { "epoch": 0.31, "learning_rate": 1.9214284957987403e-06, "logits/chosen": -0.9370530247688293, "logits/rejected": -0.9180455207824707, "logps/chosen": -174.78518676757812, "logps/rejected": -256.046142578125, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": -0.5939656496047974, "rewards/margins": 9.297115325927734, "rewards/rejected": -9.891081809997559, "step": 898 }, { "epoch": 0.31, "learning_rate": 1.921213601468821e-06, "logits/chosen": -1.0149056911468506, "logits/rejected": -1.0032150745391846, "logps/chosen": -181.22203063964844, "logps/rejected": -240.79202270507812, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.7981427311897278, "rewards/margins": 8.208638191223145, "rewards/rejected": -9.006781578063965, "step": 899 }, { "epoch": 0.31, "learning_rate": 1.920998425720859e-06, "logits/chosen": -0.9638606309890747, "logits/rejected": -0.9438490867614746, "logps/chosen": -159.76966857910156, "logps/rejected": -274.265380859375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -1.4388668537139893, "rewards/margins": 8.846514701843262, "rewards/rejected": -10.285380363464355, "step": 900 }, { "epoch": 0.31, "learning_rate": 1.920782968620588e-06, "logits/chosen": -0.9648231863975525, "logits/rejected": -0.9478709101676941, "logps/chosen": -171.95045471191406, "logps/rejected": -271.4820251464844, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -2.144432306289673, "rewards/margins": 8.128002166748047, "rewards/rejected": -10.27243423461914, "step": 901 }, { "epoch": 0.31, "learning_rate": 1.9205672302338275e-06, "logits/chosen": -1.0113376379013062, "logits/rejected": -0.9842404723167419, "logps/chosen": -181.64486694335938, "logps/rejected": -303.2397155761719, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 0.04008558392524719, "rewards/margins": 12.658492088317871, "rewards/rejected": -12.618406295776367, "step": 902 }, { "epoch": 0.31, "learning_rate": 1.920351210626482e-06, "logits/chosen": -0.8383169770240784, "logits/rejected": -0.803309440612793, "logps/chosen": -183.3402862548828, "logps/rejected": -251.636962890625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.9816311001777649, "rewards/margins": 10.886285781860352, "rewards/rejected": -11.867916107177734, "step": 903 }, { "epoch": 0.31, "learning_rate": 1.920134909864543e-06, "logits/chosen": -1.0063624382019043, "logits/rejected": -0.9724878072738647, "logps/chosen": -240.57582092285156, "logps/rejected": -328.2682800292969, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.8162817358970642, "rewards/margins": 10.40487289428711, "rewards/rejected": -11.22115421295166, "step": 904 }, { "epoch": 0.31, "learning_rate": 1.9199183280140875e-06, "logits/chosen": -1.0386606454849243, "logits/rejected": -1.017433762550354, "logps/chosen": -255.98892211914062, "logps/rejected": -379.20831298828125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.05698026716709137, "rewards/margins": 12.1066255569458, "rewards/rejected": -12.163606643676758, "step": 905 }, { "epoch": 0.31, "learning_rate": 1.919701465141277e-06, "logits/chosen": -1.0405254364013672, "logits/rejected": -1.003524899482727, "logps/chosen": -189.0708465576172, "logps/rejected": -255.14320373535156, "loss": 0.0844, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8930978775024414, "rewards/margins": 8.53710651397705, "rewards/rejected": -10.430204391479492, "step": 906 }, { "epoch": 0.31, "learning_rate": 1.919484321312362e-06, "logits/chosen": -1.0678473711013794, "logits/rejected": -1.0554851293563843, "logps/chosen": -146.97508239746094, "logps/rejected": -255.22506713867188, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -1.7149866819381714, "rewards/margins": 8.31725788116455, "rewards/rejected": -10.032245635986328, "step": 907 }, { "epoch": 0.31, "learning_rate": 1.9192668965936754e-06, "logits/chosen": -1.0010106563568115, "logits/rejected": -0.9590476155281067, "logps/chosen": -237.95870971679688, "logps/rejected": -321.723876953125, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.5754654407501221, "rewards/margins": 10.635127067565918, "rewards/rejected": -11.210592269897461, "step": 908 }, { "epoch": 0.31, "learning_rate": 1.919049191051638e-06, "logits/chosen": -0.9291940331459045, "logits/rejected": -0.8904851675033569, "logps/chosen": -170.2935028076172, "logps/rejected": -236.4227294921875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.07545630633831024, "rewards/margins": 9.709904670715332, "rewards/rejected": -9.785360336303711, "step": 909 }, { "epoch": 0.31, "learning_rate": 1.9188312047527563e-06, "logits/chosen": -1.0034081935882568, "logits/rejected": -0.9806145429611206, "logps/chosen": -204.9378204345703, "logps/rejected": -337.0037536621094, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -2.241516590118408, "rewards/margins": 12.381343841552734, "rewards/rejected": -14.6228609085083, "step": 910 }, { "epoch": 0.31, "learning_rate": 1.9186129377636218e-06, "logits/chosen": -0.9007360935211182, "logits/rejected": -0.8619586229324341, "logps/chosen": -210.4112091064453, "logps/rejected": -310.4862976074219, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -1.5310927629470825, "rewards/margins": 11.70595932006836, "rewards/rejected": -13.237051963806152, "step": 911 }, { "epoch": 0.31, "learning_rate": 1.9183943901509115e-06, "logits/chosen": -0.9858536720275879, "logits/rejected": -0.9408054947853088, "logps/chosen": -258.5379638671875, "logps/rejected": -342.5256652832031, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.43189147114753723, "rewards/margins": 12.46748161315918, "rewards/rejected": -12.899374008178711, "step": 912 }, { "epoch": 0.31, "learning_rate": 1.9181755619813896e-06, "logits/chosen": -0.985974133014679, "logits/rejected": -0.9710265398025513, "logps/chosen": -138.75936889648438, "logps/rejected": -207.49464416503906, "loss": 0.0537, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9256821870803833, "rewards/margins": 7.62981653213501, "rewards/rejected": -8.555499076843262, "step": 913 }, { "epoch": 0.31, "learning_rate": 1.9179564533219043e-06, "logits/chosen": -0.9319443702697754, "logits/rejected": -0.8948405981063843, "logps/chosen": -187.15087890625, "logps/rejected": -277.1686706542969, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -1.3420939445495605, "rewards/margins": 10.542379379272461, "rewards/rejected": -11.88447380065918, "step": 914 }, { "epoch": 0.31, "learning_rate": 1.91773706423939e-06, "logits/chosen": -1.0462950468063354, "logits/rejected": -1.0265835523605347, "logps/chosen": -176.95599365234375, "logps/rejected": -233.6110076904297, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.682664394378662, "rewards/margins": 7.830191612243652, "rewards/rejected": -9.512855529785156, "step": 915 }, { "epoch": 0.31, "learning_rate": 1.9175173948008684e-06, "logits/chosen": -0.9385451674461365, "logits/rejected": -0.9007163643836975, "logps/chosen": -190.29461669921875, "logps/rejected": -266.7979431152344, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.8182297348976135, "rewards/margins": 8.870798110961914, "rewards/rejected": -9.689027786254883, "step": 916 }, { "epoch": 0.31, "learning_rate": 1.9172974450734447e-06, "logits/chosen": -1.0517386198043823, "logits/rejected": -1.0274087190628052, "logps/chosen": -237.43714904785156, "logps/rejected": -331.57391357421875, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.9540663361549377, "rewards/margins": 11.59998607635498, "rewards/rejected": -12.554052352905273, "step": 917 }, { "epoch": 0.31, "learning_rate": 1.9170772151243104e-06, "logits/chosen": -0.9288458824157715, "logits/rejected": -0.9095070958137512, "logps/chosen": -204.65594482421875, "logps/rejected": -293.8514099121094, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.8357724547386169, "rewards/margins": 8.176678657531738, "rewards/rejected": -9.012451171875, "step": 918 }, { "epoch": 0.31, "learning_rate": 1.9168567050207428e-06, "logits/chosen": -0.9246836304664612, "logits/rejected": -0.9082679748535156, "logps/chosen": -166.60525512695312, "logps/rejected": -263.1827392578125, "loss": 0.0311, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7994118928909302, "rewards/margins": 9.743209838867188, "rewards/rejected": -11.542620658874512, "step": 919 }, { "epoch": 0.31, "learning_rate": 1.9166359148301046e-06, "logits/chosen": -0.8460103869438171, "logits/rejected": -0.8051750659942627, "logps/chosen": -239.6687774658203, "logps/rejected": -312.5994873046875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 0.1092221736907959, "rewards/margins": 11.536444664001465, "rewards/rejected": -11.427223205566406, "step": 920 }, { "epoch": 0.31, "learning_rate": 1.9164148446198448e-06, "logits/chosen": -0.9899542927742004, "logits/rejected": -0.9744304418563843, "logps/chosen": -212.98846435546875, "logps/rejected": -312.51995849609375, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.1866440773010254, "rewards/margins": 10.599363327026367, "rewards/rejected": -11.786006927490234, "step": 921 }, { "epoch": 0.31, "learning_rate": 1.916193494457496e-06, "logits/chosen": -0.991828441619873, "logits/rejected": -0.9674802422523499, "logps/chosen": -222.2827911376953, "logps/rejected": -287.70849609375, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -1.8585443496704102, "rewards/margins": 9.435676574707031, "rewards/rejected": -11.294220924377441, "step": 922 }, { "epoch": 0.32, "learning_rate": 1.915971864410679e-06, "logits/chosen": -0.8726867437362671, "logits/rejected": -0.8640937805175781, "logps/chosen": -155.61341857910156, "logps/rejected": -277.84222412109375, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.6455355882644653, "rewards/margins": 8.399343490600586, "rewards/rejected": -9.044878005981445, "step": 923 }, { "epoch": 0.32, "learning_rate": 1.9157499545470976e-06, "logits/chosen": -0.9228135347366333, "logits/rejected": -0.9101450443267822, "logps/chosen": -204.217529296875, "logps/rejected": -341.10791015625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.8985668420791626, "rewards/margins": 11.254565238952637, "rewards/rejected": -12.153131484985352, "step": 924 }, { "epoch": 0.32, "learning_rate": 1.915527764934543e-06, "logits/chosen": -0.9514289498329163, "logits/rejected": -0.9113046526908875, "logps/chosen": -157.18695068359375, "logps/rejected": -231.98580932617188, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.1285045146942139, "rewards/margins": 9.96347713470459, "rewards/rejected": -11.091980934143066, "step": 925 }, { "epoch": 0.32, "learning_rate": 1.915305295640891e-06, "logits/chosen": -0.9638538956642151, "logits/rejected": -0.9493209719657898, "logps/chosen": -143.54702758789062, "logps/rejected": -246.13368225097656, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -1.2378160953521729, "rewards/margins": 8.389054298400879, "rewards/rejected": -9.626871109008789, "step": 926 }, { "epoch": 0.32, "learning_rate": 1.915082546734102e-06, "logits/chosen": -0.9842336177825928, "logits/rejected": -0.985732913017273, "logps/chosen": -136.8850555419922, "logps/rejected": -250.7018585205078, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.3872389793395996, "rewards/margins": 9.56828498840332, "rewards/rejected": -10.955524444580078, "step": 927 }, { "epoch": 0.32, "learning_rate": 1.9148595182822237e-06, "logits/chosen": -0.9284588694572449, "logits/rejected": -0.9063735008239746, "logps/chosen": -196.7731170654297, "logps/rejected": -283.8121337890625, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -1.3453905582427979, "rewards/margins": 9.664158821105957, "rewards/rejected": -11.009549140930176, "step": 928 }, { "epoch": 0.32, "learning_rate": 1.914636210353388e-06, "logits/chosen": -1.0525212287902832, "logits/rejected": -1.0352362394332886, "logps/chosen": -204.6571044921875, "logps/rejected": -288.6630859375, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -1.2961328029632568, "rewards/margins": 9.36767292022705, "rewards/rejected": -10.663805961608887, "step": 929 }, { "epoch": 0.32, "learning_rate": 1.9144126230158124e-06, "logits/chosen": -1.0687634944915771, "logits/rejected": -1.0508664846420288, "logps/chosen": -205.14810180664062, "logps/rejected": -312.9345397949219, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.448275089263916, "rewards/margins": 9.932577133178711, "rewards/rejected": -11.380851745605469, "step": 930 }, { "epoch": 0.32, "learning_rate": 1.9141887563378e-06, "logits/chosen": -0.8848387598991394, "logits/rejected": -0.8451995253562927, "logps/chosen": -178.1589813232422, "logps/rejected": -251.45260620117188, "loss": 0.0188, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9102139472961426, "rewards/margins": 8.446892738342285, "rewards/rejected": -10.35710620880127, "step": 931 }, { "epoch": 0.32, "learning_rate": 1.9139646103877378e-06, "logits/chosen": -1.003466010093689, "logits/rejected": -0.9860981106758118, "logps/chosen": -173.5375213623047, "logps/rejected": -241.2232208251953, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -1.964918613433838, "rewards/margins": 9.47314167022705, "rewards/rejected": -11.43805980682373, "step": 932 }, { "epoch": 0.32, "learning_rate": 1.9137401852341004e-06, "logits/chosen": -0.926373302936554, "logits/rejected": -0.9120243787765503, "logps/chosen": -241.27276611328125, "logps/rejected": -414.3103942871094, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.8696706891059875, "rewards/margins": 14.831707954406738, "rewards/rejected": -15.701375961303711, "step": 933 }, { "epoch": 0.32, "learning_rate": 1.9135154809454465e-06, "logits/chosen": -0.9828436374664307, "logits/rejected": -0.9652118682861328, "logps/chosen": -179.15061950683594, "logps/rejected": -321.3584289550781, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.7965556383132935, "rewards/margins": 11.35003662109375, "rewards/rejected": -12.146592140197754, "step": 934 }, { "epoch": 0.32, "learning_rate": 1.9132904975904193e-06, "logits/chosen": -0.9664428234100342, "logits/rejected": -0.9277318716049194, "logps/chosen": -203.56845092773438, "logps/rejected": -263.4629211425781, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.8864503502845764, "rewards/margins": 9.176847457885742, "rewards/rejected": -10.063298225402832, "step": 935 }, { "epoch": 0.32, "learning_rate": 1.9130652352377496e-06, "logits/chosen": -1.015346884727478, "logits/rejected": -1.0028387308120728, "logps/chosen": -212.08197021484375, "logps/rejected": -300.7069091796875, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8196456432342529, "rewards/margins": 11.079062461853027, "rewards/rejected": -11.898709297180176, "step": 936 }, { "epoch": 0.32, "learning_rate": 1.9128396939562507e-06, "logits/chosen": -0.9440268874168396, "logits/rejected": -0.9031566381454468, "logps/chosen": -249.27540588378906, "logps/rejected": -345.4793701171875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.5017374753952026, "rewards/margins": 11.978986740112305, "rewards/rejected": -12.480724334716797, "step": 937 }, { "epoch": 0.32, "learning_rate": 1.9126138738148228e-06, "logits/chosen": -0.9607807397842407, "logits/rejected": -0.9431307315826416, "logps/chosen": -139.20590209960938, "logps/rejected": -242.36239624023438, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -1.691784143447876, "rewards/margins": 9.948213577270508, "rewards/rejected": -11.639998435974121, "step": 938 }, { "epoch": 0.32, "learning_rate": 1.912387774882451e-06, "logits/chosen": -1.0401756763458252, "logits/rejected": -0.9996753931045532, "logps/chosen": -263.8715515136719, "logps/rejected": -273.9540100097656, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.8387221097946167, "rewards/margins": 10.938554763793945, "rewards/rejected": -10.099832534790039, "step": 939 }, { "epoch": 0.32, "learning_rate": 1.912161397228205e-06, "logits/chosen": -1.0029301643371582, "logits/rejected": -1.0001903772354126, "logps/chosen": -187.72479248046875, "logps/rejected": -315.88397216796875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.9750896692276001, "rewards/margins": 10.050256729125977, "rewards/rejected": -11.025346755981445, "step": 940 }, { "epoch": 0.32, "learning_rate": 1.9119347409212403e-06, "logits/chosen": -1.005875825881958, "logits/rejected": -0.994394838809967, "logps/chosen": -200.4732666015625, "logps/rejected": -269.8752746582031, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.5679789781570435, "rewards/margins": 8.71692180633545, "rewards/rejected": -9.284902572631836, "step": 941 }, { "epoch": 0.32, "learning_rate": 1.9117078060307973e-06, "logits/chosen": -0.9779708385467529, "logits/rejected": -0.9654921293258667, "logps/chosen": -214.98736572265625, "logps/rejected": -296.5003967285156, "loss": 0.0237, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3105460405349731, "rewards/margins": 9.133844375610352, "rewards/rejected": -10.444390296936035, "step": 942 }, { "epoch": 0.32, "learning_rate": 1.911480592626201e-06, "logits/chosen": -0.897840678691864, "logits/rejected": -0.8595592379570007, "logps/chosen": -239.3634033203125, "logps/rejected": -301.7545471191406, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 0.09460528939962387, "rewards/margins": 9.23670482635498, "rewards/rejected": -9.142099380493164, "step": 943 }, { "epoch": 0.32, "learning_rate": 1.911253100776863e-06, "logits/chosen": -0.9925597310066223, "logits/rejected": -0.9608437418937683, "logps/chosen": -213.1226806640625, "logps/rejected": -308.0242004394531, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.6897441744804382, "rewards/margins": 11.391425132751465, "rewards/rejected": -12.081169128417969, "step": 944 }, { "epoch": 0.32, "learning_rate": 1.911025330552278e-06, "logits/chosen": -0.9050557017326355, "logits/rejected": -0.857651948928833, "logps/chosen": -228.698974609375, "logps/rejected": -287.5367431640625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.8152945041656494, "rewards/margins": 11.744011878967285, "rewards/rejected": -12.559306144714355, "step": 945 }, { "epoch": 0.32, "learning_rate": 1.9107972820220267e-06, "logits/chosen": -1.0276339054107666, "logits/rejected": -1.0100599527359009, "logps/chosen": -172.64845275878906, "logps/rejected": -263.1897277832031, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.5658351182937622, "rewards/margins": 9.013579368591309, "rewards/rejected": -9.579414367675781, "step": 946 }, { "epoch": 0.32, "learning_rate": 1.9105689552557753e-06, "logits/chosen": -1.006119966506958, "logits/rejected": -0.9793890118598938, "logps/chosen": -203.32557678222656, "logps/rejected": -291.76910400390625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.6061601638793945, "rewards/margins": 9.815878868103027, "rewards/rejected": -11.422039031982422, "step": 947 }, { "epoch": 0.32, "learning_rate": 1.910340350323274e-06, "logits/chosen": -0.9708040356636047, "logits/rejected": -0.9461195468902588, "logps/chosen": -147.88856506347656, "logps/rejected": -210.56167602539062, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1574804782867432, "rewards/margins": 8.962447166442871, "rewards/rejected": -10.119927406311035, "step": 948 }, { "epoch": 0.32, "learning_rate": 1.9101114672943587e-06, "logits/chosen": -1.0058287382125854, "logits/rejected": -0.9747049808502197, "logps/chosen": -207.21131896972656, "logps/rejected": -330.47723388671875, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -2.2143919467926025, "rewards/margins": 10.623927116394043, "rewards/rejected": -12.838319778442383, "step": 949 }, { "epoch": 0.32, "learning_rate": 1.9098823062389502e-06, "logits/chosen": -0.9901570677757263, "logits/rejected": -0.9702141284942627, "logps/chosen": -168.07049560546875, "logps/rejected": -230.05967712402344, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -1.0598372220993042, "rewards/margins": 8.299736976623535, "rewards/rejected": -9.359574317932129, "step": 950 }, { "epoch": 0.32, "learning_rate": 1.9096528672270535e-06, "logits/chosen": -0.9508213400840759, "logits/rejected": -0.942499577999115, "logps/chosen": -212.44522094726562, "logps/rejected": -342.89727783203125, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.6836891174316406, "rewards/margins": 10.687613487243652, "rewards/rejected": -12.37130355834961, "step": 951 }, { "epoch": 0.32, "learning_rate": 1.9094231503287595e-06, "logits/chosen": -1.0120704174041748, "logits/rejected": -0.9928672313690186, "logps/chosen": -229.3857879638672, "logps/rejected": -312.55322265625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.3617621958255768, "rewards/margins": 9.95269775390625, "rewards/rejected": -10.314459800720215, "step": 952 }, { "epoch": 0.33, "learning_rate": 1.9091931556142434e-06, "logits/chosen": -0.9244806170463562, "logits/rejected": -0.9098807573318481, "logps/chosen": -152.26905822753906, "logps/rejected": -266.36962890625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -1.6513431072235107, "rewards/margins": 9.17760181427002, "rewards/rejected": -10.82894515991211, "step": 953 }, { "epoch": 0.33, "learning_rate": 1.9089628831537653e-06, "logits/chosen": -0.9815248250961304, "logits/rejected": -0.9673017859458923, "logps/chosen": -246.76466369628906, "logps/rejected": -373.76605224609375, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.7263674139976501, "rewards/margins": 13.079439163208008, "rewards/rejected": -13.805807113647461, "step": 954 }, { "epoch": 0.33, "learning_rate": 1.9087323330176703e-06, "logits/chosen": -0.9748388528823853, "logits/rejected": -0.9560434818267822, "logps/chosen": -201.40863037109375, "logps/rejected": -288.73406982421875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.2528609037399292, "rewards/margins": 11.716543197631836, "rewards/rejected": -11.969404220581055, "step": 955 }, { "epoch": 0.33, "learning_rate": 1.9085015052763884e-06, "logits/chosen": -1.0029605627059937, "logits/rejected": -0.9804708361625671, "logps/chosen": -243.55264282226562, "logps/rejected": -402.0469970703125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.030953019857406616, "rewards/margins": 14.087580680847168, "rewards/rejected": -14.056628227233887, "step": 956 }, { "epoch": 0.33, "learning_rate": 1.9082704000004343e-06, "logits/chosen": -1.0245897769927979, "logits/rejected": -1.0108623504638672, "logps/chosen": -197.3173828125, "logps/rejected": -326.6927795410156, "loss": 0.0377, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4745593070983887, "rewards/margins": 11.00910758972168, "rewards/rejected": -13.483668327331543, "step": 957 }, { "epoch": 0.33, "learning_rate": 1.9080390172604073e-06, "logits/chosen": -0.9408648014068604, "logits/rejected": -0.9286066889762878, "logps/chosen": -183.24929809570312, "logps/rejected": -284.5751037597656, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.4136245846748352, "rewards/margins": 9.92241382598877, "rewards/rejected": -10.336039543151855, "step": 958 }, { "epoch": 0.33, "learning_rate": 1.907807357126992e-06, "logits/chosen": -0.924299955368042, "logits/rejected": -0.8932464718818665, "logps/chosen": -163.46792602539062, "logps/rejected": -219.00550842285156, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.11132752895355225, "rewards/margins": 7.4218926429748535, "rewards/rejected": -7.533220291137695, "step": 959 }, { "epoch": 0.33, "learning_rate": 1.907575419670957e-06, "logits/chosen": -0.9712944626808167, "logits/rejected": -0.9453842043876648, "logps/chosen": -181.14462280273438, "logps/rejected": -287.0485534667969, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -2.28959059715271, "rewards/margins": 8.94781494140625, "rewards/rejected": -11.237403869628906, "step": 960 }, { "epoch": 0.33, "learning_rate": 1.9073432049631562e-06, "logits/chosen": -0.9972956776618958, "logits/rejected": -0.9799597859382629, "logps/chosen": -189.56443786621094, "logps/rejected": -282.1059265136719, "loss": 0.1031, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2639909982681274, "rewards/margins": 9.102874755859375, "rewards/rejected": -10.366866111755371, "step": 961 }, { "epoch": 0.33, "learning_rate": 1.9071107130745276e-06, "logits/chosen": -0.9431208968162537, "logits/rejected": -0.9211626648902893, "logps/chosen": -136.2276611328125, "logps/rejected": -243.7233123779297, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.5966943502426147, "rewards/margins": 10.169431686401367, "rewards/rejected": -10.766125679016113, "step": 962 }, { "epoch": 0.33, "learning_rate": 1.9068779440760946e-06, "logits/chosen": -1.0034573078155518, "logits/rejected": -0.9818302392959595, "logps/chosen": -180.94683837890625, "logps/rejected": -252.15628051757812, "loss": 0.0229, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9624806642532349, "rewards/margins": 7.6243791580200195, "rewards/rejected": -8.586859703063965, "step": 963 }, { "epoch": 0.33, "learning_rate": 1.906644898038965e-06, "logits/chosen": -0.9577499628067017, "logits/rejected": -0.9421329498291016, "logps/chosen": -239.5866241455078, "logps/rejected": -318.89495849609375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.7642319202423096, "rewards/margins": 11.949042320251465, "rewards/rejected": -12.713274955749512, "step": 964 }, { "epoch": 0.33, "learning_rate": 1.9064115750343311e-06, "logits/chosen": -0.8526036143302917, "logits/rejected": -0.8278849124908447, "logps/chosen": -153.1604461669922, "logps/rejected": -252.84278869628906, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -0.77762770652771, "rewards/margins": 8.892404556274414, "rewards/rejected": -9.670032501220703, "step": 965 }, { "epoch": 0.33, "learning_rate": 1.9061779751334694e-06, "logits/chosen": -0.9134493470191956, "logits/rejected": -0.882565438747406, "logps/chosen": -165.9158477783203, "logps/rejected": -239.04212951660156, "loss": 0.0298, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8889360427856445, "rewards/margins": 9.65004825592041, "rewards/rejected": -10.538984298706055, "step": 966 }, { "epoch": 0.33, "learning_rate": 1.905944098407742e-06, "logits/chosen": -0.9227221012115479, "logits/rejected": -0.9201210141181946, "logps/chosen": -127.67543029785156, "logps/rejected": -177.46063232421875, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -0.5662477016448975, "rewards/margins": 6.477964401245117, "rewards/rejected": -7.0442118644714355, "step": 967 }, { "epoch": 0.33, "learning_rate": 1.9057099449285951e-06, "logits/chosen": -0.9671862721443176, "logits/rejected": -0.9460793137550354, "logps/chosen": -222.23287963867188, "logps/rejected": -338.02801513671875, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -1.6926952600479126, "rewards/margins": 10.685896873474121, "rewards/rejected": -12.378591537475586, "step": 968 }, { "epoch": 0.33, "learning_rate": 1.9054755147675591e-06, "logits/chosen": -1.0211539268493652, "logits/rejected": -0.9964058995246887, "logps/chosen": -180.012451171875, "logps/rejected": -238.29388427734375, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -2.1878113746643066, "rewards/margins": 8.803071022033691, "rewards/rejected": -10.990883827209473, "step": 969 }, { "epoch": 0.33, "learning_rate": 1.905240807996249e-06, "logits/chosen": -0.8508103489875793, "logits/rejected": -0.8312565684318542, "logps/chosen": -188.98858642578125, "logps/rejected": -310.5719299316406, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -0.5489206910133362, "rewards/margins": 9.610466003417969, "rewards/rejected": -10.159385681152344, "step": 970 }, { "epoch": 0.33, "learning_rate": 1.9050058246863645e-06, "logits/chosen": -0.9614779949188232, "logits/rejected": -0.9298476576805115, "logps/chosen": -157.05653381347656, "logps/rejected": -286.1781921386719, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.3090143203735352, "rewards/margins": 11.142900466918945, "rewards/rejected": -12.45191478729248, "step": 971 }, { "epoch": 0.33, "learning_rate": 1.9047705649096902e-06, "logits/chosen": -0.8736904263496399, "logits/rejected": -0.8536800146102905, "logps/chosen": -184.54611206054688, "logps/rejected": -285.1759948730469, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.7219163179397583, "rewards/margins": 9.100651741027832, "rewards/rejected": -9.822568893432617, "step": 972 }, { "epoch": 0.33, "learning_rate": 1.904535028738094e-06, "logits/chosen": -0.9885146021842957, "logits/rejected": -0.9586727023124695, "logps/chosen": -216.58392333984375, "logps/rejected": -316.6125793457031, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.9907943606376648, "rewards/margins": 11.794946670532227, "rewards/rejected": -12.785740852355957, "step": 973 }, { "epoch": 0.33, "learning_rate": 1.90429921624353e-06, "logits/chosen": -0.979737401008606, "logits/rejected": -0.943859338760376, "logps/chosen": -173.771240234375, "logps/rejected": -308.6124572753906, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -2.0247249603271484, "rewards/margins": 12.362249374389648, "rewards/rejected": -14.386974334716797, "step": 974 }, { "epoch": 0.33, "learning_rate": 1.9040631274980345e-06, "logits/chosen": -0.9337289333343506, "logits/rejected": -0.9107937812805176, "logps/chosen": -227.02288818359375, "logps/rejected": -312.52490234375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.1946179866790771, "rewards/margins": 10.274761199951172, "rewards/rejected": -11.469380378723145, "step": 975 }, { "epoch": 0.33, "learning_rate": 1.9038267625737303e-06, "logits/chosen": -1.0051630735397339, "logits/rejected": -0.9782695770263672, "logps/chosen": -214.0450439453125, "logps/rejected": -303.03704833984375, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -1.2979607582092285, "rewards/margins": 9.76965618133545, "rewards/rejected": -11.06761646270752, "step": 976 }, { "epoch": 0.33, "learning_rate": 1.9035901215428229e-06, "logits/chosen": -0.95775306224823, "logits/rejected": -0.9332994818687439, "logps/chosen": -257.3456115722656, "logps/rejected": -361.62060546875, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.5130535364151001, "rewards/margins": 11.474081039428711, "rewards/rejected": -11.987133979797363, "step": 977 }, { "epoch": 0.33, "learning_rate": 1.9033532044776033e-06, "logits/chosen": -0.9980751276016235, "logits/rejected": -0.9615218639373779, "logps/chosen": -124.61361694335938, "logps/rejected": -242.55198669433594, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.199326753616333, "rewards/margins": 10.793171882629395, "rewards/rejected": -11.992498397827148, "step": 978 }, { "epoch": 0.33, "learning_rate": 1.9031160114504466e-06, "logits/chosen": -0.9769184589385986, "logits/rejected": -0.9534260034561157, "logps/chosen": -208.56576538085938, "logps/rejected": -313.40728759765625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.23808874189853668, "rewards/margins": 9.154230117797852, "rewards/rejected": -9.392318725585938, "step": 979 }, { "epoch": 0.33, "learning_rate": 1.9028785425338113e-06, "logits/chosen": -1.1053440570831299, "logits/rejected": -1.061402678489685, "logps/chosen": -236.85501098632812, "logps/rejected": -300.2098388671875, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3459820747375488, "rewards/margins": 9.667346954345703, "rewards/rejected": -11.013328552246094, "step": 980 }, { "epoch": 0.33, "learning_rate": 1.9026407978002412e-06, "logits/chosen": -1.0513370037078857, "logits/rejected": -1.008374571800232, "logps/chosen": -203.2095947265625, "logps/rejected": -262.45465087890625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.789467453956604, "rewards/margins": 10.304426193237305, "rewards/rejected": -12.093894958496094, "step": 981 }, { "epoch": 0.34, "learning_rate": 1.902402777322364e-06, "logits/chosen": -0.9943570494651794, "logits/rejected": -0.9520961046218872, "logps/chosen": -168.3358154296875, "logps/rejected": -227.2205047607422, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.808150053024292, "rewards/margins": 8.92141342163086, "rewards/rejected": -9.729562759399414, "step": 982 }, { "epoch": 0.34, "learning_rate": 1.9021644811728921e-06, "logits/chosen": -0.9567182660102844, "logits/rejected": -0.9178169369697571, "logps/chosen": -174.26632690429688, "logps/rejected": -215.63211059570312, "loss": 0.0586, "rewards/accuracies": 0.9375, "rewards/chosen": -0.21422263979911804, "rewards/margins": 8.07281494140625, "rewards/rejected": -8.287036895751953, "step": 983 }, { "epoch": 0.34, "learning_rate": 1.9019259094246212e-06, "logits/chosen": -0.9710248708724976, "logits/rejected": -0.9557413458824158, "logps/chosen": -211.5923614501953, "logps/rejected": -316.79681396484375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.2414088100194931, "rewards/margins": 10.778017044067383, "rewards/rejected": -11.019426345825195, "step": 984 }, { "epoch": 0.34, "learning_rate": 1.9016870621504318e-06, "logits/chosen": -0.9471913576126099, "logits/rejected": -0.9187785983085632, "logps/chosen": -229.18850708007812, "logps/rejected": -360.27386474609375, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.38326364755630493, "rewards/margins": 14.290507316589355, "rewards/rejected": -14.673770904541016, "step": 985 }, { "epoch": 0.34, "learning_rate": 1.9014479394232885e-06, "logits/chosen": -0.958484411239624, "logits/rejected": -0.9460846781730652, "logps/chosen": -179.9703826904297, "logps/rejected": -281.95318603515625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -1.4245822429656982, "rewards/margins": 11.604738235473633, "rewards/rejected": -13.02932071685791, "step": 986 }, { "epoch": 0.34, "learning_rate": 1.90120854131624e-06, "logits/chosen": -0.9205437302589417, "logits/rejected": -0.9142365455627441, "logps/chosen": -163.425048828125, "logps/rejected": -258.61859130859375, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.6787947416305542, "rewards/margins": 9.624797821044922, "rewards/rejected": -11.303592681884766, "step": 987 }, { "epoch": 0.34, "learning_rate": 1.9009688679024189e-06, "logits/chosen": -1.068882942199707, "logits/rejected": -1.0184193849563599, "logps/chosen": -258.4083251953125, "logps/rejected": -346.3065185546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7745919227600098, "rewards/margins": 12.31412124633789, "rewards/rejected": -13.088713645935059, "step": 988 }, { "epoch": 0.34, "learning_rate": 1.9007289192550427e-06, "logits/chosen": -0.9198625683784485, "logits/rejected": -0.9041613340377808, "logps/chosen": -160.99229431152344, "logps/rejected": -270.1304016113281, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -1.0303555727005005, "rewards/margins": 10.659402847290039, "rewards/rejected": -11.689757347106934, "step": 989 }, { "epoch": 0.34, "learning_rate": 1.9004886954474117e-06, "logits/chosen": -1.0265933275222778, "logits/rejected": -0.9963968396186829, "logps/chosen": -216.58035278320312, "logps/rejected": -294.5001220703125, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.5814926624298096, "rewards/margins": 10.060534477233887, "rewards/rejected": -11.64202880859375, "step": 990 }, { "epoch": 0.34, "learning_rate": 1.900248196552911e-06, "logits/chosen": -0.9765748381614685, "logits/rejected": -0.9642009139060974, "logps/chosen": -145.32595825195312, "logps/rejected": -238.92434692382812, "loss": 0.0472, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2063207626342773, "rewards/margins": 8.678190231323242, "rewards/rejected": -9.884509086608887, "step": 991 }, { "epoch": 0.34, "learning_rate": 1.9000074226450106e-06, "logits/chosen": -1.0165504217147827, "logits/rejected": -0.9814569354057312, "logps/chosen": -215.26443481445312, "logps/rejected": -303.32672119140625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.3238673210144043, "rewards/margins": 10.840825080871582, "rewards/rejected": -13.164691925048828, "step": 992 }, { "epoch": 0.34, "learning_rate": 1.8997663737972625e-06, "logits/chosen": -0.9532288908958435, "logits/rejected": -0.9352431893348694, "logps/chosen": -158.97767639160156, "logps/rejected": -267.3240661621094, "loss": 0.0476, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7773563861846924, "rewards/margins": 10.322407722473145, "rewards/rejected": -12.099764823913574, "step": 993 }, { "epoch": 0.34, "learning_rate": 1.8995250500833042e-06, "logits/chosen": -0.93934166431427, "logits/rejected": -0.8987959623336792, "logps/chosen": -190.93154907226562, "logps/rejected": -309.05255126953125, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.7815633416175842, "rewards/margins": 13.47819709777832, "rewards/rejected": -14.259760856628418, "step": 994 }, { "epoch": 0.34, "learning_rate": 1.899283451576857e-06, "logits/chosen": -0.9449823498725891, "logits/rejected": -0.924785315990448, "logps/chosen": -238.6095428466797, "logps/rejected": -337.751220703125, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -2.0995047092437744, "rewards/margins": 10.440799713134766, "rewards/rejected": -12.540302276611328, "step": 995 }, { "epoch": 0.34, "learning_rate": 1.8990415783517256e-06, "logits/chosen": -0.8273655772209167, "logits/rejected": -0.7911558151245117, "logps/chosen": -196.1661834716797, "logps/rejected": -275.78765869140625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.8274835348129272, "rewards/margins": 10.128198623657227, "rewards/rejected": -10.955680847167969, "step": 996 }, { "epoch": 0.34, "learning_rate": 1.8987994304817993e-06, "logits/chosen": -0.984077513217926, "logits/rejected": -0.9682413935661316, "logps/chosen": -137.51901245117188, "logps/rejected": -230.3408203125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.11997578293085098, "rewards/margins": 9.756185531616211, "rewards/rejected": -9.636209487915039, "step": 997 }, { "epoch": 0.34, "learning_rate": 1.8985570080410504e-06, "logits/chosen": -0.9759625792503357, "logits/rejected": -0.9815431833267212, "logps/chosen": -156.42263793945312, "logps/rejected": -307.1253356933594, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.7992799282073975, "rewards/margins": 11.805328369140625, "rewards/rejected": -12.604609489440918, "step": 998 }, { "epoch": 0.34, "learning_rate": 1.8983143111035363e-06, "logits/chosen": -0.9898931980133057, "logits/rejected": -0.9774844646453857, "logps/chosen": -179.0161895751953, "logps/rejected": -334.5423583984375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1013580560684204, "rewards/margins": 11.419405937194824, "rewards/rejected": -12.520764350891113, "step": 999 }, { "epoch": 0.34, "learning_rate": 1.8980713397433968e-06, "logits/chosen": -0.9392002820968628, "logits/rejected": -0.8923810124397278, "logps/chosen": -207.15643310546875, "logps/rejected": -333.8825378417969, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.2574843466281891, "rewards/margins": 12.491434097290039, "rewards/rejected": -12.748917579650879, "step": 1000 }, { "epoch": 0.34, "learning_rate": 1.8978280940348568e-06, "logits/chosen": -0.9950217008590698, "logits/rejected": -0.9726006388664246, "logps/chosen": -210.7113037109375, "logps/rejected": -315.82794189453125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.5866385102272034, "rewards/margins": 10.547499656677246, "rewards/rejected": -11.134138107299805, "step": 1001 }, { "epoch": 0.34, "learning_rate": 1.8975845740522242e-06, "logits/chosen": -0.8369202017784119, "logits/rejected": -0.8265039920806885, "logps/chosen": -180.33987426757812, "logps/rejected": -271.8939208984375, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.9945195317268372, "rewards/margins": 8.813935279846191, "rewards/rejected": -9.808455467224121, "step": 1002 }, { "epoch": 0.34, "learning_rate": 1.8973407798698912e-06, "logits/chosen": -1.0367631912231445, "logits/rejected": -1.0343451499938965, "logps/chosen": -119.64551544189453, "logps/rejected": -247.91107177734375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.9770281314849854, "rewards/margins": 9.647076606750488, "rewards/rejected": -11.624104499816895, "step": 1003 }, { "epoch": 0.34, "learning_rate": 1.8970967115623338e-06, "logits/chosen": -0.9197165966033936, "logits/rejected": -0.8868984580039978, "logps/chosen": -233.43133544921875, "logps/rejected": -335.759765625, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -1.9547697305679321, "rewards/margins": 10.834183692932129, "rewards/rejected": -12.78895378112793, "step": 1004 }, { "epoch": 0.34, "learning_rate": 1.896852369204111e-06, "logits/chosen": -0.947978675365448, "logits/rejected": -0.9117785096168518, "logps/chosen": -203.8729248046875, "logps/rejected": -283.9520568847656, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -0.9459078311920166, "rewards/margins": 9.952356338500977, "rewards/rejected": -10.898263931274414, "step": 1005 }, { "epoch": 0.34, "learning_rate": 1.8966077528698663e-06, "logits/chosen": -0.9341748952865601, "logits/rejected": -0.90003901720047, "logps/chosen": -163.8695068359375, "logps/rejected": -288.3387451171875, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -1.4374397993087769, "rewards/margins": 11.118088722229004, "rewards/rejected": -12.55552864074707, "step": 1006 }, { "epoch": 0.34, "learning_rate": 1.8963628626343264e-06, "logits/chosen": -0.9316287636756897, "logits/rejected": -0.8907393217086792, "logps/chosen": -173.607177734375, "logps/rejected": -262.2021789550781, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": -0.9402394890785217, "rewards/margins": 10.5623779296875, "rewards/rejected": -11.502617835998535, "step": 1007 }, { "epoch": 0.34, "learning_rate": 1.8961176985723024e-06, "logits/chosen": -0.9775078892707825, "logits/rejected": -0.9656680822372437, "logps/chosen": -229.140625, "logps/rejected": -356.34698486328125, "loss": 0.0318, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4925624132156372, "rewards/margins": 10.128469467163086, "rewards/rejected": -10.621031761169434, "step": 1008 }, { "epoch": 0.34, "learning_rate": 1.895872260758688e-06, "logits/chosen": -0.9914987087249756, "logits/rejected": -0.9626944661140442, "logps/chosen": -171.26052856445312, "logps/rejected": -304.64801025390625, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -1.6834005117416382, "rewards/margins": 10.903450965881348, "rewards/rejected": -12.586852073669434, "step": 1009 }, { "epoch": 0.34, "learning_rate": 1.8956265492684611e-06, "logits/chosen": -0.9173950552940369, "logits/rejected": -0.8546074032783508, "logps/chosen": -207.24327087402344, "logps/rejected": -243.4940185546875, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.9108967781066895, "rewards/margins": 8.90561580657959, "rewards/rejected": -9.816513061523438, "step": 1010 }, { "epoch": 0.35, "learning_rate": 1.8953805641766833e-06, "logits/chosen": -0.9065623879432678, "logits/rejected": -0.8871538639068604, "logps/chosen": -203.2542724609375, "logps/rejected": -262.015380859375, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -1.1648013591766357, "rewards/margins": 8.063825607299805, "rewards/rejected": -9.228628158569336, "step": 1011 }, { "epoch": 0.35, "learning_rate": 1.8951343055584998e-06, "logits/chosen": -0.8881002068519592, "logits/rejected": -0.8605793714523315, "logps/chosen": -292.08807373046875, "logps/rejected": -393.2732849121094, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -2.206414222717285, "rewards/margins": 11.581957817077637, "rewards/rejected": -13.788371086120605, "step": 1012 }, { "epoch": 0.35, "learning_rate": 1.8948877734891392e-06, "logits/chosen": -0.8708029389381409, "logits/rejected": -0.8319421410560608, "logps/chosen": -181.1309356689453, "logps/rejected": -223.38702392578125, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.633642315864563, "rewards/margins": 8.034040451049805, "rewards/rejected": -8.667683601379395, "step": 1013 }, { "epoch": 0.35, "learning_rate": 1.8946409680439134e-06, "logits/chosen": -0.980398416519165, "logits/rejected": -0.9569449424743652, "logps/chosen": -191.32373046875, "logps/rejected": -268.6583251953125, "loss": 0.0485, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3901262879371643, "rewards/margins": 10.980597496032715, "rewards/rejected": -11.370723724365234, "step": 1014 }, { "epoch": 0.35, "learning_rate": 1.894393889298218e-06, "logits/chosen": -1.1016441583633423, "logits/rejected": -1.0956450700759888, "logps/chosen": -219.71575927734375, "logps/rejected": -321.9915771484375, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.9329180121421814, "rewards/margins": 10.786113739013672, "rewards/rejected": -11.719032287597656, "step": 1015 }, { "epoch": 0.35, "learning_rate": 1.8941465373275328e-06, "logits/chosen": -0.9987099170684814, "logits/rejected": -0.9800707101821899, "logps/chosen": -223.5232391357422, "logps/rejected": -331.12969970703125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9148449897766113, "rewards/margins": 10.31572151184082, "rewards/rejected": -11.230567932128906, "step": 1016 }, { "epoch": 0.35, "learning_rate": 1.8938989122074194e-06, "logits/chosen": -0.9983557462692261, "logits/rejected": -0.9473581314086914, "logps/chosen": -217.3660430908203, "logps/rejected": -262.631591796875, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -0.658740222454071, "rewards/margins": 10.553121566772461, "rewards/rejected": -11.211862564086914, "step": 1017 }, { "epoch": 0.35, "learning_rate": 1.8936510140135247e-06, "logits/chosen": -0.9419716000556946, "logits/rejected": -0.9163904786109924, "logps/chosen": -163.08250427246094, "logps/rejected": -243.592041015625, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.8509275913238525, "rewards/margins": 10.247381210327148, "rewards/rejected": -11.098308563232422, "step": 1018 }, { "epoch": 0.35, "learning_rate": 1.8934028428215778e-06, "logits/chosen": -0.9714128971099854, "logits/rejected": -0.9391783475875854, "logps/chosen": -202.87808227539062, "logps/rejected": -256.45806884765625, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": -2.154409170150757, "rewards/margins": 8.85395622253418, "rewards/rejected": -11.008365631103516, "step": 1019 }, { "epoch": 0.35, "learning_rate": 1.8931543987073916e-06, "logits/chosen": -0.9894278049468994, "logits/rejected": -0.9675495624542236, "logps/chosen": -190.75991821289062, "logps/rejected": -301.3377990722656, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.5275275707244873, "rewards/margins": 11.286479949951172, "rewards/rejected": -12.814006805419922, "step": 1020 }, { "epoch": 0.35, "learning_rate": 1.8929056817468625e-06, "logits/chosen": -0.953957736492157, "logits/rejected": -0.9282034039497375, "logps/chosen": -258.0946960449219, "logps/rejected": -374.65008544921875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.27757126092910767, "rewards/margins": 12.007040023803711, "rewards/rejected": -12.284611701965332, "step": 1021 }, { "epoch": 0.35, "learning_rate": 1.8926566920159699e-06, "logits/chosen": -0.9806132912635803, "logits/rejected": -0.9470405578613281, "logps/chosen": -199.58612060546875, "logps/rejected": -270.3366394042969, "loss": 0.0879, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4354788064956665, "rewards/margins": 9.582121849060059, "rewards/rejected": -10.017601013183594, "step": 1022 }, { "epoch": 0.35, "learning_rate": 1.8924074295907771e-06, "logits/chosen": -0.961088240146637, "logits/rejected": -0.9606385827064514, "logps/chosen": -213.55393981933594, "logps/rejected": -389.1734924316406, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.8993933200836182, "rewards/margins": 15.111604690551758, "rewards/rejected": -16.010997772216797, "step": 1023 }, { "epoch": 0.35, "learning_rate": 1.8921578945474296e-06, "logits/chosen": -0.9796563982963562, "logits/rejected": -0.975489616394043, "logps/chosen": -208.909423828125, "logps/rejected": -325.05029296875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.6344337463378906, "rewards/margins": 11.092733383178711, "rewards/rejected": -12.727169036865234, "step": 1024 }, { "epoch": 0.35, "learning_rate": 1.8919080869621576e-06, "logits/chosen": -0.9526314735412598, "logits/rejected": -0.9404052495956421, "logps/chosen": -221.6474151611328, "logps/rejected": -313.423583984375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.1208560466766357, "rewards/margins": 10.173017501831055, "rewards/rejected": -11.293872833251953, "step": 1025 }, { "epoch": 0.35, "learning_rate": 1.8916580069112738e-06, "logits/chosen": -0.9855831861495972, "logits/rejected": -0.9719290137290955, "logps/chosen": -205.2432861328125, "logps/rejected": -323.6680603027344, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.429690361022949, "rewards/margins": 10.479394912719727, "rewards/rejected": -12.909085273742676, "step": 1026 }, { "epoch": 0.35, "learning_rate": 1.8914076544711738e-06, "logits/chosen": -1.012709617614746, "logits/rejected": -1.0003982782363892, "logps/chosen": -168.9386444091797, "logps/rejected": -276.49163818359375, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -1.0920708179473877, "rewards/margins": 7.598060131072998, "rewards/rejected": -8.690131187438965, "step": 1027 }, { "epoch": 0.35, "learning_rate": 1.8911570297183367e-06, "logits/chosen": -0.8717463612556458, "logits/rejected": -0.7974902987480164, "logps/chosen": -204.893310546875, "logps/rejected": -289.1626892089844, "loss": 0.0376, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1282764673233032, "rewards/margins": 9.623751640319824, "rewards/rejected": -10.75202751159668, "step": 1028 }, { "epoch": 0.35, "learning_rate": 1.8909061327293258e-06, "logits/chosen": -1.016283392906189, "logits/rejected": -0.9840341210365295, "logps/chosen": -182.78936767578125, "logps/rejected": -242.9944305419922, "loss": 0.0387, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6748461127281189, "rewards/margins": 7.653197765350342, "rewards/rejected": -8.328043937683105, "step": 1029 }, { "epoch": 0.35, "learning_rate": 1.890654963580786e-06, "logits/chosen": -0.936005711555481, "logits/rejected": -0.9113436341285706, "logps/chosen": -230.7313232421875, "logps/rejected": -297.77587890625, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.2567342519760132, "rewards/margins": 9.6488676071167, "rewards/rejected": -10.905601501464844, "step": 1030 }, { "epoch": 0.35, "learning_rate": 1.8904035223494464e-06, "logits/chosen": -0.9152992963790894, "logits/rejected": -0.9061254262924194, "logps/chosen": -175.78916931152344, "logps/rejected": -268.2931823730469, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.0909231901168823, "rewards/margins": 7.312371730804443, "rewards/rejected": -8.403294563293457, "step": 1031 }, { "epoch": 0.35, "learning_rate": 1.890151809112118e-06, "logits/chosen": -0.8954646587371826, "logits/rejected": -0.8825819492340088, "logps/chosen": -169.77767944335938, "logps/rejected": -264.05352783203125, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -1.7036678791046143, "rewards/margins": 7.8686442375183105, "rewards/rejected": -9.572312355041504, "step": 1032 }, { "epoch": 0.35, "learning_rate": 1.8898998239456966e-06, "logits/chosen": -0.9243831634521484, "logits/rejected": -0.909847617149353, "logps/chosen": -243.01007080078125, "logps/rejected": -314.7390441894531, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": 0.2555014491081238, "rewards/margins": 11.829774856567383, "rewards/rejected": -11.574274063110352, "step": 1033 }, { "epoch": 0.35, "learning_rate": 1.8896475669271598e-06, "logits/chosen": -0.8545152544975281, "logits/rejected": -0.8450998067855835, "logps/chosen": -221.6149444580078, "logps/rejected": -331.002685546875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.4080160856246948, "rewards/margins": 10.41257095336914, "rewards/rejected": -11.820587158203125, "step": 1034 }, { "epoch": 0.35, "learning_rate": 1.889395038133569e-06, "logits/chosen": -0.9750106930732727, "logits/rejected": -0.953927218914032, "logps/chosen": -199.59739685058594, "logps/rejected": -328.9720458984375, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.9413194060325623, "rewards/margins": 12.41104507446289, "rewards/rejected": -13.352363586425781, "step": 1035 }, { "epoch": 0.35, "learning_rate": 1.8891422376420673e-06, "logits/chosen": -1.0198239088058472, "logits/rejected": -0.9975336194038391, "logps/chosen": -136.82119750976562, "logps/rejected": -222.64956665039062, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -0.5442981123924255, "rewards/margins": 9.638779640197754, "rewards/rejected": -10.183076858520508, "step": 1036 }, { "epoch": 0.35, "learning_rate": 1.8888891655298829e-06, "logits/chosen": -0.8372156620025635, "logits/rejected": -0.7982891201972961, "logps/chosen": -197.286865234375, "logps/rejected": -273.00677490234375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.47663578391075134, "rewards/margins": 8.820878982543945, "rewards/rejected": -9.297514915466309, "step": 1037 }, { "epoch": 0.35, "learning_rate": 1.8886358218743254e-06, "logits/chosen": -1.0603967905044556, "logits/rejected": -1.0441081523895264, "logps/chosen": -193.69427490234375, "logps/rejected": -318.94903564453125, "loss": 0.0436, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8711233735084534, "rewards/margins": 11.567066192626953, "rewards/rejected": -12.438189506530762, "step": 1038 }, { "epoch": 0.35, "learning_rate": 1.8883822067527876e-06, "logits/chosen": -0.9937619566917419, "logits/rejected": -0.9681477546691895, "logps/chosen": -231.7794189453125, "logps/rejected": -335.6346130371094, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.8482732176780701, "rewards/margins": 10.2245512008667, "rewards/rejected": -11.072824478149414, "step": 1039 }, { "epoch": 0.35, "learning_rate": 1.8881283202427457e-06, "logits/chosen": -0.8734621405601501, "logits/rejected": -0.8329160213470459, "logps/chosen": -211.90316772460938, "logps/rejected": -358.2674560546875, "loss": 0.0606, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6931301355361938, "rewards/margins": 11.903322219848633, "rewards/rejected": -12.596452713012695, "step": 1040 }, { "epoch": 0.36, "learning_rate": 1.8878741624217583e-06, "logits/chosen": -0.959591805934906, "logits/rejected": -0.9514382481575012, "logps/chosen": -193.48703002929688, "logps/rejected": -369.29290771484375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.975882649421692, "rewards/margins": 12.146718978881836, "rewards/rejected": -14.122602462768555, "step": 1041 }, { "epoch": 0.36, "learning_rate": 1.8876197333674675e-06, "logits/chosen": -0.9551475644111633, "logits/rejected": -0.906201183795929, "logps/chosen": -246.22024536132812, "logps/rejected": -292.79052734375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.26518768072128296, "rewards/margins": 12.33393383026123, "rewards/rejected": -12.599122047424316, "step": 1042 }, { "epoch": 0.36, "learning_rate": 1.8873650331575973e-06, "logits/chosen": -0.9111723899841309, "logits/rejected": -0.8871491551399231, "logps/chosen": -238.30101013183594, "logps/rejected": -347.9361877441406, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.1721214056015015, "rewards/margins": 11.110912322998047, "rewards/rejected": -12.283034324645996, "step": 1043 }, { "epoch": 0.36, "learning_rate": 1.8871100618699552e-06, "logits/chosen": -0.9309352040290833, "logits/rejected": -0.9064829349517822, "logps/chosen": -249.06228637695312, "logps/rejected": -357.45806884765625, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625216960906982, "rewards/margins": 12.1436128616333, "rewards/rejected": -14.206134796142578, "step": 1044 }, { "epoch": 0.36, "learning_rate": 1.886854819582432e-06, "logits/chosen": -0.9032780528068542, "logits/rejected": -0.8895872235298157, "logps/chosen": -169.99923706054688, "logps/rejected": -237.67457580566406, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -0.3892550468444824, "rewards/margins": 8.853856086730957, "rewards/rejected": -9.243110656738281, "step": 1045 }, { "epoch": 0.36, "learning_rate": 1.8865993063730002e-06, "logits/chosen": -1.0007669925689697, "logits/rejected": -0.9546563625335693, "logps/chosen": -231.01614379882812, "logps/rejected": -265.24945068359375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.60890793800354, "rewards/margins": 8.964972496032715, "rewards/rejected": -10.573881149291992, "step": 1046 }, { "epoch": 0.36, "learning_rate": 1.8863435223197155e-06, "logits/chosen": -0.9051845073699951, "logits/rejected": -0.8819416165351868, "logps/chosen": -206.93826293945312, "logps/rejected": -312.2302551269531, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.1677764654159546, "rewards/margins": 13.100051879882812, "rewards/rejected": -14.267827987670898, "step": 1047 }, { "epoch": 0.36, "learning_rate": 1.8860874675007163e-06, "logits/chosen": -0.9353067278862, "logits/rejected": -0.884066641330719, "logps/chosen": -199.71527099609375, "logps/rejected": -217.70835876464844, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.20582105219364166, "rewards/margins": 9.14117431640625, "rewards/rejected": -9.34699535369873, "step": 1048 }, { "epoch": 0.36, "learning_rate": 1.8858311419942244e-06, "logits/chosen": -0.8871408104896545, "logits/rejected": -0.8593572378158569, "logps/chosen": -217.19522094726562, "logps/rejected": -314.66845703125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.2784174680709839, "rewards/margins": 9.587679862976074, "rewards/rejected": -10.866096496582031, "step": 1049 }, { "epoch": 0.36, "learning_rate": 1.8855745458785433e-06, "logits/chosen": -0.9156633615493774, "logits/rejected": -0.8912405371665955, "logps/chosen": -170.65908813476562, "logps/rejected": -254.75184631347656, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.5490913391113281, "rewards/margins": 9.141571044921875, "rewards/rejected": -10.690661430358887, "step": 1050 }, { "epoch": 0.36, "learning_rate": 1.8853176792320595e-06, "logits/chosen": -0.965229332447052, "logits/rejected": -0.9342803359031677, "logps/chosen": -224.14244079589844, "logps/rejected": -361.1180114746094, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.6438546180725098, "rewards/margins": 12.581367492675781, "rewards/rejected": -14.225223541259766, "step": 1051 }, { "epoch": 0.36, "learning_rate": 1.8850605421332425e-06, "logits/chosen": -0.9924341440200806, "logits/rejected": -0.9708107113838196, "logps/chosen": -201.112060546875, "logps/rejected": -298.6730041503906, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.4673616886138916, "rewards/margins": 11.06396770477295, "rewards/rejected": -12.531328201293945, "step": 1052 }, { "epoch": 0.36, "learning_rate": 1.8848031346606439e-06, "logits/chosen": -0.8849722743034363, "logits/rejected": -0.856685996055603, "logps/chosen": -189.4636688232422, "logps/rejected": -292.31494140625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.9360252618789673, "rewards/margins": 10.458555221557617, "rewards/rejected": -11.394579887390137, "step": 1053 }, { "epoch": 0.36, "learning_rate": 1.8845454568928981e-06, "logits/chosen": -1.0006942749023438, "logits/rejected": -0.9664852023124695, "logps/chosen": -191.56944274902344, "logps/rejected": -280.0005798339844, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -1.4100395441055298, "rewards/margins": 10.479427337646484, "rewards/rejected": -11.88946533203125, "step": 1054 }, { "epoch": 0.36, "learning_rate": 1.8842875089087227e-06, "logits/chosen": -0.9870855808258057, "logits/rejected": -0.9324396848678589, "logps/chosen": -207.17605590820312, "logps/rejected": -326.4268798828125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.5230789184570312, "rewards/margins": 14.870408058166504, "rewards/rejected": -15.393486976623535, "step": 1055 }, { "epoch": 0.36, "learning_rate": 1.884029290786916e-06, "logits/chosen": -1.005998969078064, "logits/rejected": -0.9873730540275574, "logps/chosen": -217.38888549804688, "logps/rejected": -316.1063232421875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.5128475427627563, "rewards/margins": 11.203316688537598, "rewards/rejected": -12.716163635253906, "step": 1056 }, { "epoch": 0.36, "learning_rate": 1.8837708026063615e-06, "logits/chosen": -0.9813951849937439, "logits/rejected": -0.9526918530464172, "logps/chosen": -190.64279174804688, "logps/rejected": -281.8308410644531, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.3565235137939453, "rewards/margins": 9.957525253295898, "rewards/rejected": -11.314047813415527, "step": 1057 }, { "epoch": 0.36, "learning_rate": 1.8835120444460229e-06, "logits/chosen": -0.9859745502471924, "logits/rejected": -0.9635905623435974, "logps/chosen": -166.509033203125, "logps/rejected": -246.39370727539062, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.14847704768180847, "rewards/margins": 10.306112289428711, "rewards/rejected": -10.454590797424316, "step": 1058 }, { "epoch": 0.36, "learning_rate": 1.8832530163849473e-06, "logits/chosen": -0.894933819770813, "logits/rejected": -0.8606374263763428, "logps/chosen": -207.44808959960938, "logps/rejected": -309.5439453125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.577739953994751, "rewards/margins": 10.433597564697266, "rewards/rejected": -12.011337280273438, "step": 1059 }, { "epoch": 0.36, "learning_rate": 1.8829937185022646e-06, "logits/chosen": -0.9309861063957214, "logits/rejected": -0.900118887424469, "logps/chosen": -179.6094207763672, "logps/rejected": -235.72482299804688, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -0.20335039496421814, "rewards/margins": 9.041611671447754, "rewards/rejected": -9.244961738586426, "step": 1060 }, { "epoch": 0.36, "learning_rate": 1.8827341508771863e-06, "logits/chosen": -0.910957396030426, "logits/rejected": -0.9058135747909546, "logps/chosen": -174.23773193359375, "logps/rejected": -337.3187255859375, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.688129723072052, "rewards/margins": 12.775405883789062, "rewards/rejected": -13.463536262512207, "step": 1061 }, { "epoch": 0.36, "learning_rate": 1.8824743135890069e-06, "logits/chosen": -0.8893176913261414, "logits/rejected": -0.8595686554908752, "logps/chosen": -219.4069366455078, "logps/rejected": -336.88043212890625, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -1.8536909818649292, "rewards/margins": 11.765670776367188, "rewards/rejected": -13.619359970092773, "step": 1062 }, { "epoch": 0.36, "learning_rate": 1.8822142067171032e-06, "logits/chosen": -0.9741157293319702, "logits/rejected": -0.9488915801048279, "logps/chosen": -164.2882080078125, "logps/rejected": -282.6109619140625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.364629864692688, "rewards/margins": 12.623302459716797, "rewards/rejected": -12.987933158874512, "step": 1063 }, { "epoch": 0.36, "learning_rate": 1.8819538303409341e-06, "logits/chosen": -0.9352109432220459, "logits/rejected": -0.9098097681999207, "logps/chosen": -201.4636993408203, "logps/rejected": -273.7213134765625, "loss": 0.0582, "rewards/accuracies": 0.875, "rewards/chosen": -0.9560637474060059, "rewards/margins": 9.033597946166992, "rewards/rejected": -9.989662170410156, "step": 1064 }, { "epoch": 0.36, "learning_rate": 1.8816931845400413e-06, "logits/chosen": -1.0328887701034546, "logits/rejected": -0.9737735986709595, "logps/chosen": -237.859130859375, "logps/rejected": -332.66082763671875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.005756683647632599, "rewards/margins": 14.920745849609375, "rewards/rejected": -14.926501274108887, "step": 1065 }, { "epoch": 0.36, "learning_rate": 1.881432269394048e-06, "logits/chosen": -0.9326927661895752, "logits/rejected": -0.9242473244667053, "logps/chosen": -218.96115112304688, "logps/rejected": -361.585693359375, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.3765497207641602, "rewards/margins": 9.841958999633789, "rewards/rejected": -11.21850872039795, "step": 1066 }, { "epoch": 0.36, "learning_rate": 1.8811710849826603e-06, "logits/chosen": -0.9719162583351135, "logits/rejected": -0.9457948803901672, "logps/chosen": -234.7952880859375, "logps/rejected": -325.8712158203125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.4423398971557617, "rewards/margins": 9.312775611877441, "rewards/rejected": -10.755114555358887, "step": 1067 }, { "epoch": 0.36, "learning_rate": 1.8809096313856668e-06, "logits/chosen": -0.8868853449821472, "logits/rejected": -0.8698877692222595, "logps/chosen": -230.46751403808594, "logps/rejected": -331.077392578125, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.6051421165466309, "rewards/margins": 10.315271377563477, "rewards/rejected": -11.92041301727295, "step": 1068 }, { "epoch": 0.36, "learning_rate": 1.8806479086829377e-06, "logits/chosen": -0.9502058625221252, "logits/rejected": -0.9334902763366699, "logps/chosen": -172.31552124023438, "logps/rejected": -247.0098419189453, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.125835657119751, "rewards/margins": 11.017370223999023, "rewards/rejected": -12.143206596374512, "step": 1069 }, { "epoch": 0.37, "learning_rate": 1.8803859169544257e-06, "logits/chosen": -0.9462164044380188, "logits/rejected": -0.92381352186203, "logps/chosen": -232.014404296875, "logps/rejected": -340.8966369628906, "loss": 0.028, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8062876462936401, "rewards/margins": 12.165231704711914, "rewards/rejected": -12.97152042388916, "step": 1070 }, { "epoch": 0.37, "learning_rate": 1.8801236562801655e-06, "logits/chosen": -0.9553064703941345, "logits/rejected": -0.9271108508110046, "logps/chosen": -260.9956359863281, "logps/rejected": -343.98651123046875, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -0.43035879731178284, "rewards/margins": 11.98579216003418, "rewards/rejected": -12.416149139404297, "step": 1071 }, { "epoch": 0.37, "learning_rate": 1.8798611267402744e-06, "logits/chosen": -0.9696205258369446, "logits/rejected": -0.9607225060462952, "logps/chosen": -233.35850524902344, "logps/rejected": -359.9779357910156, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -1.024810791015625, "rewards/margins": 13.362688064575195, "rewards/rejected": -14.38749885559082, "step": 1072 }, { "epoch": 0.37, "learning_rate": 1.8795983284149515e-06, "logits/chosen": -0.9784599542617798, "logits/rejected": -0.9661076068878174, "logps/chosen": -190.02297973632812, "logps/rejected": -310.42034912109375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9486210346221924, "rewards/margins": 11.059249877929688, "rewards/rejected": -12.007871627807617, "step": 1073 }, { "epoch": 0.37, "learning_rate": 1.879335261384478e-06, "logits/chosen": -0.828659176826477, "logits/rejected": -0.8100801110267639, "logps/chosen": -197.0509796142578, "logps/rejected": -307.40728759765625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -1.034606695175171, "rewards/margins": 10.862811088562012, "rewards/rejected": -11.897418022155762, "step": 1074 }, { "epoch": 0.37, "learning_rate": 1.879071925729217e-06, "logits/chosen": -0.9520967602729797, "logits/rejected": -0.9326341152191162, "logps/chosen": -263.1643981933594, "logps/rejected": -358.9059143066406, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4297804832458496, "rewards/margins": 11.005817413330078, "rewards/rejected": -12.43559741973877, "step": 1075 }, { "epoch": 0.37, "learning_rate": 1.8788083215296145e-06, "logits/chosen": -0.9699141383171082, "logits/rejected": -0.9547759890556335, "logps/chosen": -210.86354064941406, "logps/rejected": -333.0281677246094, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.018395524471998215, "rewards/margins": 13.005897521972656, "rewards/rejected": -12.987500190734863, "step": 1076 }, { "epoch": 0.37, "learning_rate": 1.8785444488661973e-06, "logits/chosen": -0.8877097964286804, "logits/rejected": -0.8507330417633057, "logps/chosen": -266.4693603515625, "logps/rejected": -386.4822082519531, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -0.5057427287101746, "rewards/margins": 12.699779510498047, "rewards/rejected": -13.205522537231445, "step": 1077 }, { "epoch": 0.37, "learning_rate": 1.8782803078195752e-06, "logits/chosen": -0.9517576098442078, "logits/rejected": -0.9431162476539612, "logps/chosen": -234.71954345703125, "logps/rejected": -335.65386962890625, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -0.519858181476593, "rewards/margins": 10.968938827514648, "rewards/rejected": -11.488798141479492, "step": 1078 }, { "epoch": 0.37, "learning_rate": 1.8780158984704397e-06, "logits/chosen": -0.9738491773605347, "logits/rejected": -0.9425783157348633, "logps/chosen": -220.14700317382812, "logps/rejected": -335.4359130859375, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -1.256330966949463, "rewards/margins": 12.484599113464355, "rewards/rejected": -13.740930557250977, "step": 1079 }, { "epoch": 0.37, "learning_rate": 1.8777512208995644e-06, "logits/chosen": -0.9869896173477173, "logits/rejected": -0.941840648651123, "logps/chosen": -229.00660705566406, "logps/rejected": -308.84619140625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5488306283950806, "rewards/margins": 12.381946563720703, "rewards/rejected": -13.930776596069336, "step": 1080 }, { "epoch": 0.37, "learning_rate": 1.8774862751878038e-06, "logits/chosen": -0.9068428874015808, "logits/rejected": -0.8661561012268066, "logps/chosen": -231.07281494140625, "logps/rejected": -317.5367736816406, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -1.7570078372955322, "rewards/margins": 10.886053085327148, "rewards/rejected": -12.643059730529785, "step": 1081 }, { "epoch": 0.37, "learning_rate": 1.8772210614160957e-06, "logits/chosen": -0.9099252223968506, "logits/rejected": -0.9066486954689026, "logps/chosen": -153.84677124023438, "logps/rejected": -291.3558349609375, "loss": 0.0243, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7893725633621216, "rewards/margins": 8.350689888000488, "rewards/rejected": -10.14006233215332, "step": 1082 }, { "epoch": 0.37, "learning_rate": 1.8769555796654597e-06, "logits/chosen": -0.9370359778404236, "logits/rejected": -0.9066627621650696, "logps/chosen": -176.87359619140625, "logps/rejected": -306.6036376953125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.4881097078323364, "rewards/margins": 11.820338249206543, "rewards/rejected": -13.308448791503906, "step": 1083 }, { "epoch": 0.37, "learning_rate": 1.8766898300169963e-06, "logits/chosen": -0.9828417897224426, "logits/rejected": -0.9563087821006775, "logps/chosen": -148.48126220703125, "logps/rejected": -217.031494140625, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.347227931022644, "rewards/margins": 9.136096000671387, "rewards/rejected": -10.483322143554688, "step": 1084 }, { "epoch": 0.37, "learning_rate": 1.8764238125518878e-06, "logits/chosen": -0.9042534232139587, "logits/rejected": -0.8568911552429199, "logps/chosen": -199.61862182617188, "logps/rejected": -274.4550476074219, "loss": 0.0448, "rewards/accuracies": 0.875, "rewards/chosen": -0.28267979621887207, "rewards/margins": 10.291519165039062, "rewards/rejected": -10.574198722839355, "step": 1085 }, { "epoch": 0.37, "learning_rate": 1.8761575273514e-06, "logits/chosen": -0.9899535179138184, "logits/rejected": -0.970119297504425, "logps/chosen": -213.05613708496094, "logps/rejected": -309.677978515625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.348008632659912, "rewards/margins": 9.906181335449219, "rewards/rejected": -11.254189491271973, "step": 1086 }, { "epoch": 0.37, "learning_rate": 1.8758909744968786e-06, "logits/chosen": -0.8352717161178589, "logits/rejected": -0.8246398568153381, "logps/chosen": -115.80062103271484, "logps/rejected": -213.2848663330078, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.8737888336181641, "rewards/margins": 8.356317520141602, "rewards/rejected": -9.230107307434082, "step": 1087 }, { "epoch": 0.37, "learning_rate": 1.8756241540697526e-06, "logits/chosen": -1.0366748571395874, "logits/rejected": -1.016118049621582, "logps/chosen": -195.17518615722656, "logps/rejected": -277.391357421875, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": -1.285847544670105, "rewards/margins": 10.050312995910645, "rewards/rejected": -11.336159706115723, "step": 1088 }, { "epoch": 0.37, "learning_rate": 1.875357066151531e-06, "logits/chosen": -0.8530562520027161, "logits/rejected": -0.8143896460533142, "logps/chosen": -224.7672119140625, "logps/rejected": -317.2368469238281, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.140974521636963, "rewards/margins": 10.796823501586914, "rewards/rejected": -11.937797546386719, "step": 1089 }, { "epoch": 0.37, "learning_rate": 1.875089710823806e-06, "logits/chosen": -0.9103756546974182, "logits/rejected": -0.8827741146087646, "logps/chosen": -184.57666015625, "logps/rejected": -262.733642578125, "loss": 0.0463, "rewards/accuracies": 0.9375, "rewards/chosen": -0.43197575211524963, "rewards/margins": 9.756312370300293, "rewards/rejected": -10.188287734985352, "step": 1090 }, { "epoch": 0.37, "learning_rate": 1.8748220881682505e-06, "logits/chosen": -0.897513747215271, "logits/rejected": -0.8963592648506165, "logps/chosen": -178.77511596679688, "logps/rejected": -278.1046447753906, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.028837740421295166, "rewards/margins": 10.9417724609375, "rewards/rejected": -10.912935256958008, "step": 1091 }, { "epoch": 0.37, "learning_rate": 1.8745541982666202e-06, "logits/chosen": -1.004793405532837, "logits/rejected": -0.9885936975479126, "logps/chosen": -123.73291015625, "logps/rejected": -275.66497802734375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.9600325226783752, "rewards/margins": 10.90700626373291, "rewards/rejected": -11.867037773132324, "step": 1092 }, { "epoch": 0.37, "learning_rate": 1.8742860412007513e-06, "logits/chosen": -0.9827119708061218, "logits/rejected": -0.9513375759124756, "logps/chosen": -191.59988403320312, "logps/rejected": -275.446533203125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.746732473373413, "rewards/margins": 11.373015403747559, "rewards/rejected": -13.119747161865234, "step": 1093 }, { "epoch": 0.37, "learning_rate": 1.8740176170525622e-06, "logits/chosen": -0.9734506011009216, "logits/rejected": -0.957241952419281, "logps/chosen": -231.4215850830078, "logps/rejected": -391.6517333984375, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.364863634109497, "rewards/margins": 14.821107864379883, "rewards/rejected": -16.185970306396484, "step": 1094 }, { "epoch": 0.37, "learning_rate": 1.873748925904053e-06, "logits/chosen": -0.8781458735466003, "logits/rejected": -0.8493611812591553, "logps/chosen": -227.56396484375, "logps/rejected": -344.35614013671875, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.9571573734283447, "rewards/margins": 13.079167366027832, "rewards/rejected": -14.036323547363281, "step": 1095 }, { "epoch": 0.37, "learning_rate": 1.8734799678373048e-06, "logits/chosen": -0.9769335389137268, "logits/rejected": -0.9533987045288086, "logps/chosen": -202.32424926757812, "logps/rejected": -296.95477294921875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.3946768045425415, "rewards/margins": 10.93864917755127, "rewards/rejected": -10.543972969055176, "step": 1096 }, { "epoch": 0.37, "learning_rate": 1.8732107429344808e-06, "logits/chosen": -0.8960181474685669, "logits/rejected": -0.8819341063499451, "logps/chosen": -227.54498291015625, "logps/rejected": -306.0995178222656, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.2484710216522217, "rewards/margins": 7.896029472351074, "rewards/rejected": -9.144500732421875, "step": 1097 }, { "epoch": 0.37, "learning_rate": 1.872941251277825e-06, "logits/chosen": -0.9019114375114441, "logits/rejected": -0.8672541975975037, "logps/chosen": -201.5950927734375, "logps/rejected": -274.35540771484375, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -1.2236038446426392, "rewards/margins": 9.607706069946289, "rewards/rejected": -10.831311225891113, "step": 1098 }, { "epoch": 0.38, "learning_rate": 1.8726714929496646e-06, "logits/chosen": -0.9512694478034973, "logits/rejected": -0.9544159770011902, "logps/chosen": -157.76473999023438, "logps/rejected": -318.28662109375, "loss": 0.1186, "rewards/accuracies": 1.0, "rewards/chosen": 0.11320126056671143, "rewards/margins": 12.48204517364502, "rewards/rejected": -12.368843078613281, "step": 1099 }, { "epoch": 0.38, "learning_rate": 1.8724014680324057e-06, "logits/chosen": -0.9075066447257996, "logits/rejected": -0.888346791267395, "logps/chosen": -205.34683227539062, "logps/rejected": -302.1856994628906, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.3540596961975098, "rewards/margins": 10.88559341430664, "rewards/rejected": -12.239653587341309, "step": 1100 }, { "epoch": 0.38, "learning_rate": 1.8721311766085375e-06, "logits/chosen": -0.9379795789718628, "logits/rejected": -0.9097322225570679, "logps/chosen": -161.75970458984375, "logps/rejected": -261.4141540527344, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.23311299085617065, "rewards/margins": 9.722332000732422, "rewards/rejected": -9.955446243286133, "step": 1101 }, { "epoch": 0.38, "learning_rate": 1.871860618760631e-06, "logits/chosen": -0.795966386795044, "logits/rejected": -0.741388201713562, "logps/chosen": -228.29794311523438, "logps/rejected": -291.6007385253906, "loss": 0.0239, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9151914119720459, "rewards/margins": 9.909635543823242, "rewards/rejected": -10.824828147888184, "step": 1102 }, { "epoch": 0.38, "learning_rate": 1.8715897945713373e-06, "logits/chosen": -0.8656696081161499, "logits/rejected": -0.8423115015029907, "logps/chosen": -187.5298614501953, "logps/rejected": -293.1224060058594, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -0.6583720445632935, "rewards/margins": 10.524479866027832, "rewards/rejected": -11.18285083770752, "step": 1103 }, { "epoch": 0.38, "learning_rate": 1.8713187041233893e-06, "logits/chosen": -1.0006815195083618, "logits/rejected": -0.9741655588150024, "logps/chosen": -228.79766845703125, "logps/rejected": -308.9602355957031, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.35985127091407776, "rewards/margins": 12.79030990600586, "rewards/rejected": -12.430458068847656, "step": 1104 }, { "epoch": 0.38, "learning_rate": 1.8710473474996017e-06, "logits/chosen": -0.9034968614578247, "logits/rejected": -0.8673596382141113, "logps/chosen": -233.59417724609375, "logps/rejected": -286.0211181640625, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.9195971488952637, "rewards/margins": 8.263625144958496, "rewards/rejected": -9.183221817016602, "step": 1105 }, { "epoch": 0.38, "learning_rate": 1.8707757247828704e-06, "logits/chosen": -0.9313420057296753, "logits/rejected": -0.9164752960205078, "logps/chosen": -185.2618408203125, "logps/rejected": -298.62921142578125, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.7355726361274719, "rewards/margins": 11.575891494750977, "rewards/rejected": -12.311464309692383, "step": 1106 }, { "epoch": 0.38, "learning_rate": 1.870503836056172e-06, "logits/chosen": -0.9129209518432617, "logits/rejected": -0.9141771197319031, "logps/chosen": -229.76095581054688, "logps/rejected": -353.18023681640625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.391547679901123, "rewards/margins": 9.931417465209961, "rewards/rejected": -11.322965621948242, "step": 1107 }, { "epoch": 0.38, "learning_rate": 1.8702316814025648e-06, "logits/chosen": -0.915841817855835, "logits/rejected": -0.8947139382362366, "logps/chosen": -256.6459045410156, "logps/rejected": -355.05853271484375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.01827608048915863, "rewards/margins": 11.42374324798584, "rewards/rejected": -11.442020416259766, "step": 1108 }, { "epoch": 0.38, "learning_rate": 1.8699592609051887e-06, "logits/chosen": -0.8974980711936951, "logits/rejected": -0.8607132434844971, "logps/chosen": -199.71453857421875, "logps/rejected": -292.7204895019531, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.1012519598007202, "rewards/margins": 11.027009963989258, "rewards/rejected": -12.128262519836426, "step": 1109 }, { "epoch": 0.38, "learning_rate": 1.8696865746472637e-06, "logits/chosen": -0.9536197185516357, "logits/rejected": -0.9314932823181152, "logps/chosen": -185.29248046875, "logps/rejected": -282.88818359375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.9120955467224121, "rewards/margins": 10.554869651794434, "rewards/rejected": -11.466965675354004, "step": 1110 }, { "epoch": 0.38, "learning_rate": 1.8694136227120923e-06, "logits/chosen": -0.9825069904327393, "logits/rejected": -0.9720380306243896, "logps/chosen": -200.27383422851562, "logps/rejected": -297.90435791015625, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 0.4270235598087311, "rewards/margins": 10.849587440490723, "rewards/rejected": -10.422563552856445, "step": 1111 }, { "epoch": 0.38, "learning_rate": 1.8691404051830575e-06, "logits/chosen": -0.975624144077301, "logits/rejected": -0.9689940810203552, "logps/chosen": -269.5000915527344, "logps/rejected": -415.4847717285156, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 0.19821809232234955, "rewards/margins": 12.477652549743652, "rewards/rejected": -12.279433250427246, "step": 1112 }, { "epoch": 0.38, "learning_rate": 1.8688669221436228e-06, "logits/chosen": -0.9363716244697571, "logits/rejected": -0.9169753193855286, "logps/chosen": -197.593017578125, "logps/rejected": -253.36325073242188, "loss": 0.0398, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0305020809173584, "rewards/margins": 8.161850929260254, "rewards/rejected": -9.192353248596191, "step": 1113 }, { "epoch": 0.38, "learning_rate": 1.8685931736773347e-06, "logits/chosen": -0.9166965484619141, "logits/rejected": -0.9167832136154175, "logps/chosen": -194.42469787597656, "logps/rejected": -296.462890625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.29715126752853394, "rewards/margins": 10.233807563781738, "rewards/rejected": -10.53095817565918, "step": 1114 }, { "epoch": 0.38, "learning_rate": 1.8683191598678186e-06, "logits/chosen": -0.9033105969429016, "logits/rejected": -0.8770193457603455, "logps/chosen": -176.30393981933594, "logps/rejected": -217.13577270507812, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.8145727515220642, "rewards/margins": 8.337270736694336, "rewards/rejected": -9.151843070983887, "step": 1115 }, { "epoch": 0.38, "learning_rate": 1.868044880798782e-06, "logits/chosen": -0.8949413895606995, "logits/rejected": -0.8600044250488281, "logps/chosen": -222.4636993408203, "logps/rejected": -327.8298645019531, "loss": 0.0241, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2505592107772827, "rewards/margins": 12.72410774230957, "rewards/rejected": -12.974665641784668, "step": 1116 }, { "epoch": 0.38, "learning_rate": 1.8677703365540137e-06, "logits/chosen": -0.9005823731422424, "logits/rejected": -0.8647844195365906, "logps/chosen": -202.84967041015625, "logps/rejected": -278.4464111328125, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 1.146060824394226, "rewards/margins": 11.666610717773438, "rewards/rejected": -10.520550727844238, "step": 1117 }, { "epoch": 0.38, "learning_rate": 1.867495527217383e-06, "logits/chosen": -0.9484938979148865, "logits/rejected": -0.9068089723587036, "logps/chosen": -279.19830322265625, "logps/rejected": -388.2373962402344, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 0.5494562387466431, "rewards/margins": 11.888360977172852, "rewards/rejected": -11.338905334472656, "step": 1118 }, { "epoch": 0.38, "learning_rate": 1.8672204528728405e-06, "logits/chosen": -0.827565610408783, "logits/rejected": -0.7674148678779602, "logps/chosen": -235.58160400390625, "logps/rejected": -223.82102966308594, "loss": 0.0601, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5962632894515991, "rewards/margins": 8.916526794433594, "rewards/rejected": -8.320262908935547, "step": 1119 }, { "epoch": 0.38, "learning_rate": 1.8669451136044179e-06, "logits/chosen": -1.0253061056137085, "logits/rejected": -1.0071635246276855, "logps/chosen": -201.80982971191406, "logps/rejected": -340.6843566894531, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7194579839706421, "rewards/margins": 12.666301727294922, "rewards/rejected": -13.385760307312012, "step": 1120 }, { "epoch": 0.38, "learning_rate": 1.8666695094962267e-06, "logits/chosen": -0.9590908288955688, "logits/rejected": -0.948496401309967, "logps/chosen": -168.08924865722656, "logps/rejected": -303.42327880859375, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -0.3205120265483856, "rewards/margins": 13.193056106567383, "rewards/rejected": -13.513567924499512, "step": 1121 }, { "epoch": 0.38, "learning_rate": 1.8663936406324611e-06, "logits/chosen": -0.9787008166313171, "logits/rejected": -0.9669800400733948, "logps/chosen": -162.48532104492188, "logps/rejected": -256.9572448730469, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -0.8761616349220276, "rewards/margins": 9.135656356811523, "rewards/rejected": -10.01181697845459, "step": 1122 }, { "epoch": 0.38, "learning_rate": 1.8661175070973947e-06, "logits/chosen": -0.8542965650558472, "logits/rejected": -0.8416265249252319, "logps/chosen": -136.79981994628906, "logps/rejected": -244.78224182128906, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -1.3438589572906494, "rewards/margins": 8.588869094848633, "rewards/rejected": -9.932726860046387, "step": 1123 }, { "epoch": 0.38, "learning_rate": 1.865841108975382e-06, "logits/chosen": -0.9226089119911194, "logits/rejected": -0.8965253829956055, "logps/chosen": -204.97198486328125, "logps/rejected": -346.8670959472656, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": -0.4794670641422272, "rewards/margins": 13.068028450012207, "rewards/rejected": -13.547496795654297, "step": 1124 }, { "epoch": 0.38, "learning_rate": 1.86556444635086e-06, "logits/chosen": -0.8937702775001526, "logits/rejected": -0.8869764804840088, "logps/chosen": -145.36331176757812, "logps/rejected": -239.5681610107422, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.25846219062805176, "rewards/margins": 10.036860466003418, "rewards/rejected": -10.29532241821289, "step": 1125 }, { "epoch": 0.38, "learning_rate": 1.8652875193083447e-06, "logits/chosen": -0.8882995247840881, "logits/rejected": -0.8832316398620605, "logps/chosen": -107.28327178955078, "logps/rejected": -224.1479949951172, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.6160633563995361, "rewards/margins": 10.64472770690918, "rewards/rejected": -11.260791778564453, "step": 1126 }, { "epoch": 0.38, "learning_rate": 1.865010327932433e-06, "logits/chosen": -0.8357104659080505, "logits/rejected": -0.8202441930770874, "logps/chosen": -132.8583984375, "logps/rejected": -266.3085021972656, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.9376859664916992, "rewards/margins": 11.406655311584473, "rewards/rejected": -12.344341278076172, "step": 1127 }, { "epoch": 0.38, "learning_rate": 1.8647328723078037e-06, "logits/chosen": -0.8479474782943726, "logits/rejected": -0.849312424659729, "logps/chosen": -188.7648162841797, "logps/rejected": -341.99432373046875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.6526252031326294, "rewards/margins": 12.400588035583496, "rewards/rejected": -13.053211212158203, "step": 1128 }, { "epoch": 0.39, "learning_rate": 1.8644551525192158e-06, "logits/chosen": -0.900395393371582, "logits/rejected": -0.8863638043403625, "logps/chosen": -240.68264770507812, "logps/rejected": -365.81927490234375, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.7231094241142273, "rewards/margins": 10.377252578735352, "rewards/rejected": -11.100361824035645, "step": 1129 }, { "epoch": 0.39, "learning_rate": 1.8641771686515082e-06, "logits/chosen": -0.9304121136665344, "logits/rejected": -0.9076136946678162, "logps/chosen": -167.0400848388672, "logps/rejected": -289.9357604980469, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.4837151765823364, "rewards/margins": 11.077375411987305, "rewards/rejected": -12.561090469360352, "step": 1130 }, { "epoch": 0.39, "learning_rate": 1.8638989207896017e-06, "logits/chosen": -0.962558388710022, "logits/rejected": -0.9478775262832642, "logps/chosen": -240.16958618164062, "logps/rejected": -415.98712158203125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.524753987789154, "rewards/margins": 12.314446449279785, "rewards/rejected": -12.839200019836426, "step": 1131 }, { "epoch": 0.39, "learning_rate": 1.8636204090184967e-06, "logits/chosen": -0.9933288097381592, "logits/rejected": -0.9726072549819946, "logps/chosen": -192.92929077148438, "logps/rejected": -312.69952392578125, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -0.10414808988571167, "rewards/margins": 12.025674819946289, "rewards/rejected": -12.129823684692383, "step": 1132 }, { "epoch": 0.39, "learning_rate": 1.8633416334232752e-06, "logits/chosen": -0.830796480178833, "logits/rejected": -0.8104715943336487, "logps/chosen": -126.58826446533203, "logps/rejected": -240.72544860839844, "loss": 0.0382, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9097707271575928, "rewards/margins": 8.97662353515625, "rewards/rejected": -10.886394500732422, "step": 1133 }, { "epoch": 0.39, "learning_rate": 1.863062594089099e-06, "logits/chosen": -0.855195939540863, "logits/rejected": -0.8371503949165344, "logps/chosen": -169.06695556640625, "logps/rejected": -336.0827941894531, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 0.2954796850681305, "rewards/margins": 12.80073070526123, "rewards/rejected": -12.505250930786133, "step": 1134 }, { "epoch": 0.39, "learning_rate": 1.862783291101211e-06, "logits/chosen": -0.8761579990386963, "logits/rejected": -0.8327513933181763, "logps/chosen": -228.7827911376953, "logps/rejected": -308.8614196777344, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.038139909505844116, "rewards/margins": 11.349247932434082, "rewards/rejected": -11.387388229370117, "step": 1135 }, { "epoch": 0.39, "learning_rate": 1.8625037245449337e-06, "logits/chosen": -0.8793193101882935, "logits/rejected": -0.8718335628509521, "logps/chosen": -168.3289337158203, "logps/rejected": -303.4655456542969, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -0.8494719862937927, "rewards/margins": 11.14328670501709, "rewards/rejected": -11.992756843566895, "step": 1136 }, { "epoch": 0.39, "learning_rate": 1.8622238945056719e-06, "logits/chosen": -0.999810516834259, "logits/rejected": -0.9879650473594666, "logps/chosen": -178.001953125, "logps/rejected": -289.44281005859375, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": 0.4900147020816803, "rewards/margins": 10.236001014709473, "rewards/rejected": -9.745985984802246, "step": 1137 }, { "epoch": 0.39, "learning_rate": 1.861943801068909e-06, "logits/chosen": -0.9326390027999878, "logits/rejected": -0.9114774465560913, "logps/chosen": -184.0287322998047, "logps/rejected": -304.41949462890625, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.4535476565361023, "rewards/margins": 11.110340118408203, "rewards/rejected": -11.563887596130371, "step": 1138 }, { "epoch": 0.39, "learning_rate": 1.8616634443202099e-06, "logits/chosen": -1.0413371324539185, "logits/rejected": -1.0134634971618652, "logps/chosen": -208.84896850585938, "logps/rejected": -291.66668701171875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.2705116868019104, "rewards/margins": 12.088695526123047, "rewards/rejected": -11.818184852600098, "step": 1139 }, { "epoch": 0.39, "learning_rate": 1.8613828243452204e-06, "logits/chosen": -1.0426889657974243, "logits/rejected": -1.0209375619888306, "logps/chosen": -251.74928283691406, "logps/rejected": -324.8038330078125, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.217635154724121, "rewards/margins": 10.854131698608398, "rewards/rejected": -12.071767807006836, "step": 1140 }, { "epoch": 0.39, "learning_rate": 1.8611019412296647e-06, "logits/chosen": -0.8818244338035583, "logits/rejected": -0.8603518605232239, "logps/chosen": -177.3685760498047, "logps/rejected": -282.1255187988281, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.349343180656433, "rewards/margins": 10.475494384765625, "rewards/rejected": -11.824836730957031, "step": 1141 }, { "epoch": 0.39, "learning_rate": 1.8608207950593497e-06, "logits/chosen": -0.9570750594139099, "logits/rejected": -0.9421958327293396, "logps/chosen": -221.7193145751953, "logps/rejected": -388.17022705078125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.5214690566062927, "rewards/margins": 13.072253227233887, "rewards/rejected": -12.55078411102295, "step": 1142 }, { "epoch": 0.39, "learning_rate": 1.8605393859201612e-06, "logits/chosen": -0.9302368760108948, "logits/rejected": -0.8547126650810242, "logps/chosen": -239.56321716308594, "logps/rejected": -258.14794921875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 0.4994371235370636, "rewards/margins": 9.437260627746582, "rewards/rejected": -8.937824249267578, "step": 1143 }, { "epoch": 0.39, "learning_rate": 1.8602577138980663e-06, "logits/chosen": -0.9607137441635132, "logits/rejected": -0.9471020102500916, "logps/chosen": -190.57733154296875, "logps/rejected": -276.38623046875, "loss": 0.0162, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4540258646011353, "rewards/margins": 10.711362838745117, "rewards/rejected": -12.165389060974121, "step": 1144 }, { "epoch": 0.39, "learning_rate": 1.8599757790791113e-06, "logits/chosen": -0.9360750317573547, "logits/rejected": -0.9284728169441223, "logps/chosen": -258.818115234375, "logps/rejected": -406.0367126464844, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.4180075228214264, "rewards/margins": 13.952696800231934, "rewards/rejected": -14.370702743530273, "step": 1145 }, { "epoch": 0.39, "learning_rate": 1.8596935815494242e-06, "logits/chosen": -1.0115809440612793, "logits/rejected": -0.9896063804626465, "logps/chosen": -263.86370849609375, "logps/rejected": -363.0028076171875, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -1.4031367301940918, "rewards/margins": 11.553946495056152, "rewards/rejected": -12.957082748413086, "step": 1146 }, { "epoch": 0.39, "learning_rate": 1.8594111213952115e-06, "logits/chosen": -0.850604236125946, "logits/rejected": -0.824655294418335, "logps/chosen": -121.76390075683594, "logps/rejected": -248.958740234375, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.12075749039649963, "rewards/margins": 11.806915283203125, "rewards/rejected": -11.927672386169434, "step": 1147 }, { "epoch": 0.39, "learning_rate": 1.8591283987027614e-06, "logits/chosen": -0.956502377986908, "logits/rejected": -0.931961715221405, "logps/chosen": -225.67649841308594, "logps/rejected": -320.5288391113281, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8432906866073608, "rewards/margins": 11.827878952026367, "rewards/rejected": -12.67116928100586, "step": 1148 }, { "epoch": 0.39, "learning_rate": 1.858845413558442e-06, "logits/chosen": -0.9901552796363831, "logits/rejected": -0.9515652656555176, "logps/chosen": -175.6011962890625, "logps/rejected": -214.953369140625, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5481373071670532, "rewards/margins": 9.690764427185059, "rewards/rejected": -10.238901138305664, "step": 1149 }, { "epoch": 0.39, "learning_rate": 1.858562166048701e-06, "logits/chosen": -0.941990315914154, "logits/rejected": -0.9294713139533997, "logps/chosen": -166.5891571044922, "logps/rejected": -264.8780517578125, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -1.050054669380188, "rewards/margins": 8.624364852905273, "rewards/rejected": -9.674420356750488, "step": 1150 }, { "epoch": 0.39, "learning_rate": 1.8582786562600666e-06, "logits/chosen": -0.962614893913269, "logits/rejected": -0.9523874521255493, "logps/chosen": -178.51319885253906, "logps/rejected": -263.1114196777344, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": -1.5135844945907593, "rewards/margins": 9.44373893737793, "rewards/rejected": -10.957324981689453, "step": 1151 }, { "epoch": 0.39, "learning_rate": 1.857994884279147e-06, "logits/chosen": -1.0017614364624023, "logits/rejected": -0.9774379134178162, "logps/chosen": -229.28707885742188, "logps/rejected": -324.8057861328125, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.37433889508247375, "rewards/margins": 12.229974746704102, "rewards/rejected": -12.604313850402832, "step": 1152 }, { "epoch": 0.39, "learning_rate": 1.8577108501926314e-06, "logits/chosen": -0.9061387777328491, "logits/rejected": -0.8545132279396057, "logps/chosen": -180.36268615722656, "logps/rejected": -254.18601989746094, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 0.2045917809009552, "rewards/margins": 11.900794982910156, "rewards/rejected": -11.696202278137207, "step": 1153 }, { "epoch": 0.39, "learning_rate": 1.8574265540872874e-06, "logits/chosen": -0.9439229965209961, "logits/rejected": -0.9093415141105652, "logps/chosen": -227.0207061767578, "logps/rejected": -309.1744079589844, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": -0.05105157941579819, "rewards/margins": 12.305950164794922, "rewards/rejected": -12.357002258300781, "step": 1154 }, { "epoch": 0.39, "learning_rate": 1.8571419960499638e-06, "logits/chosen": -0.9550948143005371, "logits/rejected": -0.9372240900993347, "logps/chosen": -184.65875244140625, "logps/rejected": -318.58221435546875, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 0.5999679565429688, "rewards/margins": 13.073423385620117, "rewards/rejected": -12.473455429077148, "step": 1155 }, { "epoch": 0.39, "learning_rate": 1.8568571761675891e-06, "logits/chosen": -0.9289994239807129, "logits/rejected": -0.9114392995834351, "logps/chosen": -177.07020568847656, "logps/rejected": -220.96250915527344, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -1.3358697891235352, "rewards/margins": 7.137768268585205, "rewards/rejected": -8.473637580871582, "step": 1156 }, { "epoch": 0.39, "learning_rate": 1.856572094527172e-06, "logits/chosen": -0.8086042404174805, "logits/rejected": -0.7699186205863953, "logps/chosen": -172.0417022705078, "logps/rejected": -216.12203979492188, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -1.0834877490997314, "rewards/margins": 7.460629463195801, "rewards/rejected": -8.54411792755127, "step": 1157 }, { "epoch": 0.4, "learning_rate": 1.8562867512158014e-06, "logits/chosen": -0.9180102348327637, "logits/rejected": -0.871285617351532, "logps/chosen": -203.355224609375, "logps/rejected": -261.2488098144531, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.5629511475563049, "rewards/margins": 11.826238632202148, "rewards/rejected": -12.389189720153809, "step": 1158 }, { "epoch": 0.4, "learning_rate": 1.856001146320645e-06, "logits/chosen": -0.8916929960250854, "logits/rejected": -0.8550229668617249, "logps/chosen": -195.823486328125, "logps/rejected": -251.22152709960938, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 0.5468925833702087, "rewards/margins": 10.724153518676758, "rewards/rejected": -10.177261352539062, "step": 1159 }, { "epoch": 0.4, "learning_rate": 1.8557152799289515e-06, "logits/chosen": -0.9749802947044373, "logits/rejected": -0.9458526372909546, "logps/chosen": -215.43466186523438, "logps/rejected": -302.49371337890625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.18691349029541016, "rewards/margins": 11.034812927246094, "rewards/rejected": -11.221726417541504, "step": 1160 }, { "epoch": 0.4, "learning_rate": 1.8554291521280493e-06, "logits/chosen": -0.9368480443954468, "logits/rejected": -0.9161314964294434, "logps/chosen": -200.52175903320312, "logps/rejected": -273.66351318359375, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -0.17398717999458313, "rewards/margins": 9.275588035583496, "rewards/rejected": -9.449575424194336, "step": 1161 }, { "epoch": 0.4, "learning_rate": 1.8551427630053463e-06, "logits/chosen": -0.9809973835945129, "logits/rejected": -0.934130847454071, "logps/chosen": -216.75631713867188, "logps/rejected": -296.69842529296875, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": -0.986099362373352, "rewards/margins": 12.009847640991211, "rewards/rejected": -12.99594783782959, "step": 1162 }, { "epoch": 0.4, "learning_rate": 1.8548561126483299e-06, "logits/chosen": -0.8788991570472717, "logits/rejected": -0.8847306370735168, "logps/chosen": -168.88491821289062, "logps/rejected": -249.52645874023438, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7734879851341248, "rewards/margins": 8.308545112609863, "rewards/rejected": -9.082032203674316, "step": 1163 }, { "epoch": 0.4, "learning_rate": 1.854569201144569e-06, "logits/chosen": -0.8491623997688293, "logits/rejected": -0.8397685885429382, "logps/chosen": -191.28408813476562, "logps/rejected": -312.0413513183594, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 0.2702587842941284, "rewards/margins": 12.37197494506836, "rewards/rejected": -12.101716995239258, "step": 1164 }, { "epoch": 0.4, "learning_rate": 1.8542820285817103e-06, "logits/chosen": -0.9666796326637268, "logits/rejected": -0.9576060771942139, "logps/chosen": -123.16874694824219, "logps/rejected": -280.0504150390625, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036093145608901978, "rewards/margins": 12.338913917541504, "rewards/rejected": -12.342522621154785, "step": 1165 }, { "epoch": 0.4, "learning_rate": 1.853994595047481e-06, "logits/chosen": -0.9535970687866211, "logits/rejected": -0.9370452761650085, "logps/chosen": -214.74700927734375, "logps/rejected": -352.1937561035156, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.20342859625816345, "rewards/margins": 12.11631965637207, "rewards/rejected": -12.319746971130371, "step": 1166 }, { "epoch": 0.4, "learning_rate": 1.8537069006296883e-06, "logits/chosen": -0.9145557880401611, "logits/rejected": -0.8926374912261963, "logps/chosen": -178.76300048828125, "logps/rejected": -283.2810363769531, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.096754789352417, "rewards/margins": 9.239952087402344, "rewards/rejected": -10.33670711517334, "step": 1167 }, { "epoch": 0.4, "learning_rate": 1.853418945416219e-06, "logits/chosen": -0.9624924063682556, "logits/rejected": -0.9295341372489929, "logps/chosen": -193.37478637695312, "logps/rejected": -311.745361328125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.7062174677848816, "rewards/margins": 11.243474960327148, "rewards/rejected": -11.949692726135254, "step": 1168 }, { "epoch": 0.4, "learning_rate": 1.8531307294950394e-06, "logits/chosen": -0.9688341617584229, "logits/rejected": -0.9466043710708618, "logps/chosen": -250.3734893798828, "logps/rejected": -342.0072937011719, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.6543537378311157, "rewards/margins": 11.585826873779297, "rewards/rejected": -12.240180969238281, "step": 1169 }, { "epoch": 0.4, "learning_rate": 1.8528422529541951e-06, "logits/chosen": -0.9431330561637878, "logits/rejected": -0.9330077767372131, "logps/chosen": -183.63192749023438, "logps/rejected": -268.8494873046875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.608900785446167, "rewards/margins": 10.90446949005127, "rewards/rejected": -11.513371467590332, "step": 1170 }, { "epoch": 0.4, "learning_rate": 1.8525535158818124e-06, "logits/chosen": -0.8710498213768005, "logits/rejected": -0.8582912683486938, "logps/chosen": -185.61585998535156, "logps/rejected": -319.5482177734375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.46451815962791443, "rewards/margins": 12.04072093963623, "rewards/rejected": -12.505239486694336, "step": 1171 }, { "epoch": 0.4, "learning_rate": 1.852264518366096e-06, "logits/chosen": -0.935692548751831, "logits/rejected": -0.913705050945282, "logps/chosen": -152.29898071289062, "logps/rejected": -230.29226684570312, "loss": 0.0702, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9352970123291016, "rewards/margins": 7.435345649719238, "rewards/rejected": -8.370643615722656, "step": 1172 }, { "epoch": 0.4, "learning_rate": 1.8519752604953305e-06, "logits/chosen": -0.9575478434562683, "logits/rejected": -0.9316316246986389, "logps/chosen": -193.07241821289062, "logps/rejected": -288.9110107421875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.2090485692024231, "rewards/margins": 11.648491859436035, "rewards/rejected": -11.85754108428955, "step": 1173 }, { "epoch": 0.4, "learning_rate": 1.8516857423578807e-06, "logits/chosen": -0.9062716364860535, "logits/rejected": -0.8654555082321167, "logps/chosen": -235.07662963867188, "logps/rejected": -329.24188232421875, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": 0.1965307593345642, "rewards/margins": 10.550155639648438, "rewards/rejected": -10.35362434387207, "step": 1174 }, { "epoch": 0.4, "learning_rate": 1.8513959640421898e-06, "logits/chosen": -0.9234707951545715, "logits/rejected": -0.8919554948806763, "logps/chosen": -177.7069549560547, "logps/rejected": -260.59002685546875, "loss": 0.0493, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12681296467781067, "rewards/margins": 10.14986515045166, "rewards/rejected": -10.276679039001465, "step": 1175 }, { "epoch": 0.4, "learning_rate": 1.8511059256367817e-06, "logits/chosen": -0.8432455062866211, "logits/rejected": -0.8200737833976746, "logps/chosen": -202.35662841796875, "logps/rejected": -331.4190979003906, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.9335607290267944, "rewards/margins": 13.187784194946289, "rewards/rejected": -14.121345520019531, "step": 1176 }, { "epoch": 0.4, "learning_rate": 1.8508156272302589e-06, "logits/chosen": -0.8537355065345764, "logits/rejected": -0.8195548057556152, "logps/chosen": -224.78799438476562, "logps/rejected": -293.857421875, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -0.22205938398838043, "rewards/margins": 9.726511001586914, "rewards/rejected": -9.948570251464844, "step": 1177 }, { "epoch": 0.4, "learning_rate": 1.8505250689113033e-06, "logits/chosen": -0.8788252472877502, "logits/rejected": -0.8384209871292114, "logps/chosen": -186.8572540283203, "logps/rejected": -214.53863525390625, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.013992533087730408, "rewards/margins": 9.486186981201172, "rewards/rejected": -9.472195625305176, "step": 1178 }, { "epoch": 0.4, "learning_rate": 1.8502342507686767e-06, "logits/chosen": -0.9099043011665344, "logits/rejected": -0.8927048444747925, "logps/chosen": -217.59017944335938, "logps/rejected": -345.1374206542969, "loss": 0.0629, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9906019568443298, "rewards/margins": 10.799323081970215, "rewards/rejected": -11.789924621582031, "step": 1179 }, { "epoch": 0.4, "learning_rate": 1.84994317289122e-06, "logits/chosen": -0.946499764919281, "logits/rejected": -0.9442914724349976, "logps/chosen": -198.66546630859375, "logps/rejected": -308.041259765625, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -0.053846850991249084, "rewards/margins": 11.084600448608398, "rewards/rejected": -11.138447761535645, "step": 1180 }, { "epoch": 0.4, "learning_rate": 1.8496518353678534e-06, "logits/chosen": -0.8214306235313416, "logits/rejected": -0.7941007614135742, "logps/chosen": -193.89944458007812, "logps/rejected": -305.5042724609375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.3783584535121918, "rewards/margins": 10.001301765441895, "rewards/rejected": -10.379660606384277, "step": 1181 }, { "epoch": 0.4, "learning_rate": 1.8493602382875766e-06, "logits/chosen": -0.95613694190979, "logits/rejected": -0.9124162793159485, "logps/chosen": -230.37559509277344, "logps/rejected": -322.0440673828125, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -0.2741803824901581, "rewards/margins": 13.655525207519531, "rewards/rejected": -13.929705619812012, "step": 1182 }, { "epoch": 0.4, "learning_rate": 1.8490683817394684e-06, "logits/chosen": -0.9524217844009399, "logits/rejected": -0.918353259563446, "logps/chosen": -200.54632568359375, "logps/rejected": -272.7828369140625, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": 0.45636752247810364, "rewards/margins": 11.027467727661133, "rewards/rejected": -10.571101188659668, "step": 1183 }, { "epoch": 0.4, "learning_rate": 1.8487762658126868e-06, "logits/chosen": -0.9283949732780457, "logits/rejected": -0.9135459661483765, "logps/chosen": -192.4732666015625, "logps/rejected": -367.87457275390625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.735616147518158, "rewards/margins": 12.121176719665527, "rewards/rejected": -12.856793403625488, "step": 1184 }, { "epoch": 0.4, "learning_rate": 1.8484838905964698e-06, "logits/chosen": -0.8918415307998657, "logits/rejected": -0.8671899437904358, "logps/chosen": -184.1224822998047, "logps/rejected": -248.0922088623047, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3471248149871826, "rewards/margins": 8.857388496398926, "rewards/rejected": -10.204513549804688, "step": 1185 }, { "epoch": 0.4, "learning_rate": 1.8481912561801334e-06, "logits/chosen": -0.9524540305137634, "logits/rejected": -0.9373567700386047, "logps/chosen": -180.18026733398438, "logps/rejected": -328.5830993652344, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.2644130289554596, "rewards/margins": 13.069246292114258, "rewards/rejected": -13.333661079406738, "step": 1186 }, { "epoch": 0.41, "learning_rate": 1.8478983626530731e-06, "logits/chosen": -0.855034351348877, "logits/rejected": -0.8484457731246948, "logps/chosen": -206.93096923828125, "logps/rejected": -355.1465759277344, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 0.25192272663116455, "rewards/margins": 13.357049942016602, "rewards/rejected": -13.105127334594727, "step": 1187 }, { "epoch": 0.41, "learning_rate": 1.8476052101047648e-06, "logits/chosen": -0.9532539248466492, "logits/rejected": -0.9183475375175476, "logps/chosen": -233.81993103027344, "logps/rejected": -298.7433166503906, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.3955961763858795, "rewards/margins": 11.894033432006836, "rewards/rejected": -11.498437881469727, "step": 1188 }, { "epoch": 0.41, "learning_rate": 1.8473117986247617e-06, "logits/chosen": -0.8995837569236755, "logits/rejected": -0.8761619925498962, "logps/chosen": -154.28614807128906, "logps/rejected": -243.46939086914062, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.173901081085205, "rewards/margins": 10.312748908996582, "rewards/rejected": -11.486649513244629, "step": 1189 }, { "epoch": 0.41, "learning_rate": 1.8470181283026976e-06, "logits/chosen": -0.9246599674224854, "logits/rejected": -0.890984296798706, "logps/chosen": -229.67636108398438, "logps/rejected": -339.7564697265625, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.5723639130592346, "rewards/margins": 10.92641830444336, "rewards/rejected": -11.498781204223633, "step": 1190 }, { "epoch": 0.41, "learning_rate": 1.8467241992282841e-06, "logits/chosen": -0.9761306643486023, "logits/rejected": -0.954559326171875, "logps/chosen": -185.49195861816406, "logps/rejected": -318.5213317871094, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.4997970461845398, "rewards/margins": 14.04173755645752, "rewards/rejected": -14.541534423828125, "step": 1191 }, { "epoch": 0.41, "learning_rate": 1.846430011491313e-06, "logits/chosen": -0.8795255422592163, "logits/rejected": -0.8422953486442566, "logps/chosen": -232.28903198242188, "logps/rejected": -336.0039978027344, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.18575608730316162, "rewards/margins": 11.771488189697266, "rewards/rejected": -11.585732460021973, "step": 1192 }, { "epoch": 0.41, "learning_rate": 1.8461355651816542e-06, "logits/chosen": -0.7778792977333069, "logits/rejected": -0.738097071647644, "logps/chosen": -151.86500549316406, "logps/rejected": -217.06671142578125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8280594944953918, "rewards/margins": 8.994599342346191, "rewards/rejected": -9.82265853881836, "step": 1193 }, { "epoch": 0.41, "learning_rate": 1.8458408603892573e-06, "logits/chosen": -0.9168811440467834, "logits/rejected": -0.8718460202217102, "logps/chosen": -221.44911193847656, "logps/rejected": -313.4185791015625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 0.27023711800575256, "rewards/margins": 11.955748558044434, "rewards/rejected": -11.685511589050293, "step": 1194 }, { "epoch": 0.41, "learning_rate": 1.8455458972041503e-06, "logits/chosen": -0.9506320357322693, "logits/rejected": -0.9369039535522461, "logps/chosen": -168.39024353027344, "logps/rejected": -263.7473449707031, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 0.07952609658241272, "rewards/margins": 10.554555892944336, "rewards/rejected": -10.475029945373535, "step": 1195 }, { "epoch": 0.41, "learning_rate": 1.8452506757164406e-06, "logits/chosen": -0.9073523879051208, "logits/rejected": -0.8870944976806641, "logps/chosen": -207.300048828125, "logps/rejected": -256.4309997558594, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 0.9043404459953308, "rewards/margins": 10.31213092803955, "rewards/rejected": -9.407791137695312, "step": 1196 }, { "epoch": 0.41, "learning_rate": 1.844955196016314e-06, "logits/chosen": -0.9226937294006348, "logits/rejected": -0.913645327091217, "logps/chosen": -209.9547576904297, "logps/rejected": -311.01324462890625, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -0.7951188087463379, "rewards/margins": 9.707659721374512, "rewards/rejected": -10.502778053283691, "step": 1197 }, { "epoch": 0.41, "learning_rate": 1.8446594581940358e-06, "logits/chosen": -0.93021160364151, "logits/rejected": -0.9131984114646912, "logps/chosen": -202.27700805664062, "logps/rejected": -291.5251159667969, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -0.9082846641540527, "rewards/margins": 8.455453872680664, "rewards/rejected": -9.363739013671875, "step": 1198 }, { "epoch": 0.41, "learning_rate": 1.8443634623399498e-06, "logits/chosen": -0.9017312526702881, "logits/rejected": -0.8521380424499512, "logps/chosen": -196.4995880126953, "logps/rejected": -249.52684020996094, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.7599536180496216, "rewards/margins": 9.198322296142578, "rewards/rejected": -9.958274841308594, "step": 1199 }, { "epoch": 0.41, "learning_rate": 1.8440672085444784e-06, "logits/chosen": -0.9372005462646484, "logits/rejected": -0.9125538468360901, "logps/chosen": -254.9647674560547, "logps/rejected": -379.1946105957031, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 0.016350194811820984, "rewards/margins": 13.785135269165039, "rewards/rejected": -13.76878547668457, "step": 1200 }, { "epoch": 0.41, "learning_rate": 1.843770696898123e-06, "logits/chosen": -0.8971651196479797, "logits/rejected": -0.8787504434585571, "logps/chosen": -177.26776123046875, "logps/rejected": -306.1869201660156, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.6450831890106201, "rewards/margins": 12.54585075378418, "rewards/rejected": -14.190935134887695, "step": 1201 }, { "epoch": 0.41, "learning_rate": 1.8434739274914642e-06, "logits/chosen": -0.9420996904373169, "logits/rejected": -0.9359982013702393, "logps/chosen": -160.78469848632812, "logps/rejected": -283.1185302734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.1257718801498413, "rewards/margins": 10.511163711547852, "rewards/rejected": -11.636935234069824, "step": 1202 }, { "epoch": 0.41, "learning_rate": 1.8431769004151608e-06, "logits/chosen": -0.9237620830535889, "logits/rejected": -0.8895265460014343, "logps/chosen": -178.931640625, "logps/rejected": -228.66026306152344, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.40796518325805664, "rewards/margins": 9.376240730285645, "rewards/rejected": -9.784204483032227, "step": 1203 }, { "epoch": 0.41, "learning_rate": 1.84287961575995e-06, "logits/chosen": -0.9239035248756409, "logits/rejected": -0.9180794954299927, "logps/chosen": -158.95130920410156, "logps/rejected": -280.9417724609375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8259085416793823, "rewards/margins": 12.394193649291992, "rewards/rejected": -13.220101356506348, "step": 1204 }, { "epoch": 0.41, "learning_rate": 1.842582073616649e-06, "logits/chosen": -0.8795742988586426, "logits/rejected": -0.8635227680206299, "logps/chosen": -182.598388671875, "logps/rejected": -329.7925720214844, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.2225537300109863, "rewards/margins": 11.392541885375977, "rewards/rejected": -12.615095138549805, "step": 1205 }, { "epoch": 0.41, "learning_rate": 1.8422842740761521e-06, "logits/chosen": -0.7835774421691895, "logits/rejected": -0.7661995887756348, "logps/chosen": -195.6350860595703, "logps/rejected": -318.3856201171875, "loss": 0.0477, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3156194686889648, "rewards/margins": 9.616975784301758, "rewards/rejected": -10.932595252990723, "step": 1206 }, { "epoch": 0.41, "learning_rate": 1.8419862172294337e-06, "logits/chosen": -0.8430091738700867, "logits/rejected": -0.8163135647773743, "logps/chosen": -227.9850311279297, "logps/rejected": -330.65130615234375, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.8003532290458679, "rewards/margins": 11.856120109558105, "rewards/rejected": -12.656475067138672, "step": 1207 }, { "epoch": 0.41, "learning_rate": 1.8416879031675454e-06, "logits/chosen": -0.8865481615066528, "logits/rejected": -0.882220983505249, "logps/chosen": -241.79786682128906, "logps/rejected": -399.0641784667969, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.2933565378189087, "rewards/margins": 13.039444923400879, "rewards/rejected": -14.332801818847656, "step": 1208 }, { "epoch": 0.41, "learning_rate": 1.841389331981618e-06, "logits/chosen": -0.8845833539962769, "logits/rejected": -0.8513505458831787, "logps/chosen": -211.41238403320312, "logps/rejected": -356.64501953125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.421581506729126, "rewards/margins": 12.296412467956543, "rewards/rejected": -13.717992782592773, "step": 1209 }, { "epoch": 0.41, "learning_rate": 1.8410905037628612e-06, "logits/chosen": -0.8478583097457886, "logits/rejected": -0.8098798990249634, "logps/chosen": -151.86642456054688, "logps/rejected": -255.63787841796875, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -0.47453486919403076, "rewards/margins": 9.203025817871094, "rewards/rejected": -9.677560806274414, "step": 1210 }, { "epoch": 0.41, "learning_rate": 1.840791418602563e-06, "logits/chosen": -0.9300684332847595, "logits/rejected": -0.9001513123512268, "logps/chosen": -269.16143798828125, "logps/rejected": -395.0601501464844, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.8416905999183655, "rewards/margins": 11.186990737915039, "rewards/rejected": -12.028681755065918, "step": 1211 }, { "epoch": 0.41, "learning_rate": 1.8404920765920894e-06, "logits/chosen": -0.8828060030937195, "logits/rejected": -0.8626211881637573, "logps/chosen": -189.3662567138672, "logps/rejected": -292.6338806152344, "loss": 0.03, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3853459060192108, "rewards/margins": 9.957404136657715, "rewards/rejected": -10.342750549316406, "step": 1212 }, { "epoch": 0.41, "learning_rate": 1.8401924778228856e-06, "logits/chosen": -0.9105411767959595, "logits/rejected": -0.8884180188179016, "logps/chosen": -210.4108123779297, "logps/rejected": -339.3742370605469, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/chosen": 0.01772552728652954, "rewards/margins": 13.2119779586792, "rewards/rejected": -13.194252014160156, "step": 1213 }, { "epoch": 0.41, "learning_rate": 1.8398926223864745e-06, "logits/chosen": -1.0423672199249268, "logits/rejected": -1.0235719680786133, "logps/chosen": -214.1346893310547, "logps/rejected": -288.5589904785156, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.8085006475448608, "rewards/margins": 8.273516654968262, "rewards/rejected": -9.082016944885254, "step": 1214 }, { "epoch": 0.41, "learning_rate": 1.8395925103744583e-06, "logits/chosen": -0.8636314272880554, "logits/rejected": -0.8452805280685425, "logps/chosen": -198.98194885253906, "logps/rejected": -316.4815673828125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.20560675859451294, "rewards/margins": 11.999167442321777, "rewards/rejected": -12.204773902893066, "step": 1215 }, { "epoch": 0.42, "learning_rate": 1.8392921418785168e-06, "logits/chosen": -0.9215483069419861, "logits/rejected": -0.8895004391670227, "logps/chosen": -172.74615478515625, "logps/rejected": -303.6104736328125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6898343563079834, "rewards/margins": 12.269264221191406, "rewards/rejected": -12.959097862243652, "step": 1216 }, { "epoch": 0.42, "learning_rate": 1.8389915169904081e-06, "logits/chosen": -0.8978890776634216, "logits/rejected": -0.8398497700691223, "logps/chosen": -218.57577514648438, "logps/rejected": -268.2568359375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.12818783521652222, "rewards/margins": 12.100859642028809, "rewards/rejected": -12.229047775268555, "step": 1217 }, { "epoch": 0.42, "learning_rate": 1.8386906358019696e-06, "logits/chosen": -0.9215896725654602, "logits/rejected": -0.8848287463188171, "logps/chosen": -185.3516845703125, "logps/rejected": -270.45379638671875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.4566834568977356, "rewards/margins": 11.60063362121582, "rewards/rejected": -12.057317733764648, "step": 1218 }, { "epoch": 0.42, "learning_rate": 1.838389498405116e-06, "logits/chosen": -0.8534832000732422, "logits/rejected": -0.8228498697280884, "logps/chosen": -159.5823516845703, "logps/rejected": -227.54922485351562, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.5903714299201965, "rewards/margins": 9.444238662719727, "rewards/rejected": -10.034610748291016, "step": 1219 }, { "epoch": 0.42, "learning_rate": 1.8380881048918404e-06, "logits/chosen": -0.8883723616600037, "logits/rejected": -0.8773752450942993, "logps/chosen": -138.8048553466797, "logps/rejected": -283.97833251953125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9916737675666809, "rewards/margins": 10.859061241149902, "rewards/rejected": -11.85073471069336, "step": 1220 }, { "epoch": 0.42, "learning_rate": 1.837786455354215e-06, "logits/chosen": -0.9823306798934937, "logits/rejected": -0.9259088039398193, "logps/chosen": -204.29229736328125, "logps/rejected": -219.47076416015625, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -0.5314148664474487, "rewards/margins": 8.32111644744873, "rewards/rejected": -8.852531433105469, "step": 1221 }, { "epoch": 0.42, "learning_rate": 1.837484549884389e-06, "logits/chosen": -0.8998379707336426, "logits/rejected": -0.8698931932449341, "logps/chosen": -158.36416625976562, "logps/rejected": -284.3309631347656, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -0.992706298828125, "rewards/margins": 12.078895568847656, "rewards/rejected": -13.071601867675781, "step": 1222 }, { "epoch": 0.42, "learning_rate": 1.8371823885745908e-06, "logits/chosen": -1.0197759866714478, "logits/rejected": -1.0004132986068726, "logps/chosen": -180.5672607421875, "logps/rejected": -297.8489074707031, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.3153620958328247, "rewards/margins": 12.43769645690918, "rewards/rejected": -13.753058433532715, "step": 1223 }, { "epoch": 0.42, "learning_rate": 1.836879971517126e-06, "logits/chosen": -0.9448198676109314, "logits/rejected": -0.9033676981925964, "logps/chosen": -191.8566131591797, "logps/rejected": -287.444091796875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.0528982877731323, "rewards/margins": 11.844392776489258, "rewards/rejected": -12.897290229797363, "step": 1224 }, { "epoch": 0.42, "learning_rate": 1.836577298804379e-06, "logits/chosen": -0.90293949842453, "logits/rejected": -0.8752638101577759, "logps/chosen": -126.77976989746094, "logps/rejected": -184.64898681640625, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.8791574239730835, "rewards/margins": 8.250081062316895, "rewards/rejected": -9.129239082336426, "step": 1225 }, { "epoch": 0.42, "learning_rate": 1.8362743705288125e-06, "logits/chosen": -0.8552306890487671, "logits/rejected": -0.8364116549491882, "logps/chosen": -197.95921325683594, "logps/rejected": -287.5038146972656, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.691746711730957, "rewards/margins": 11.452020645141602, "rewards/rejected": -12.143766403198242, "step": 1226 }, { "epoch": 0.42, "learning_rate": 1.8359711867829665e-06, "logits/chosen": -0.8472580313682556, "logits/rejected": -0.7996752858161926, "logps/chosen": -152.50938415527344, "logps/rejected": -200.81309509277344, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.4638622999191284, "rewards/margins": 8.775979042053223, "rewards/rejected": -9.23984146118164, "step": 1227 }, { "epoch": 0.42, "learning_rate": 1.8356677476594595e-06, "logits/chosen": -0.8290687799453735, "logits/rejected": -0.8017690777778625, "logps/chosen": -208.10855102539062, "logps/rejected": -288.1243896484375, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.187759280204773, "rewards/margins": 12.380924224853516, "rewards/rejected": -13.568683624267578, "step": 1228 }, { "epoch": 0.42, "learning_rate": 1.8353640532509881e-06, "logits/chosen": -0.8813461661338806, "logits/rejected": -0.8588094711303711, "logps/chosen": -193.13482666015625, "logps/rejected": -335.51171875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.03136596456170082, "rewards/margins": 12.470354080200195, "rewards/rejected": -12.50171947479248, "step": 1229 }, { "epoch": 0.42, "learning_rate": 1.8350601036503274e-06, "logits/chosen": -0.8818246722221375, "logits/rejected": -0.8623347282409668, "logps/chosen": -128.0685272216797, "logps/rejected": -250.85594177246094, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -1.079522728919983, "rewards/margins": 11.137639999389648, "rewards/rejected": -12.217161178588867, "step": 1230 }, { "epoch": 0.42, "learning_rate": 1.8347558989503283e-06, "logits/chosen": -0.8214157223701477, "logits/rejected": -0.7930608987808228, "logps/chosen": -203.447265625, "logps/rejected": -289.0404052734375, "loss": 0.0333, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4123125374317169, "rewards/margins": 11.778865814208984, "rewards/rejected": -11.366552352905273, "step": 1231 }, { "epoch": 0.42, "learning_rate": 1.834451439243922e-06, "logits/chosen": -0.8672925233840942, "logits/rejected": -0.8281866312026978, "logps/chosen": -221.487060546875, "logps/rejected": -342.4407043457031, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -1.1410218477249146, "rewards/margins": 11.902717590332031, "rewards/rejected": -13.04373836517334, "step": 1232 }, { "epoch": 0.42, "learning_rate": 1.834146724624117e-06, "logits/chosen": -0.9175028800964355, "logits/rejected": -0.8832420706748962, "logps/chosen": -224.9741973876953, "logps/rejected": -274.41015625, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.6575536727905273, "rewards/margins": 10.234945297241211, "rewards/rejected": -10.892498016357422, "step": 1233 }, { "epoch": 0.42, "learning_rate": 1.833841755183999e-06, "logits/chosen": -0.9265189170837402, "logits/rejected": -0.9040430784225464, "logps/chosen": -224.54673767089844, "logps/rejected": -365.10614013671875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4881937801837921, "rewards/margins": 13.60664176940918, "rewards/rejected": -14.094836235046387, "step": 1234 }, { "epoch": 0.42, "learning_rate": 1.8335365310167318e-06, "logits/chosen": -0.9022141695022583, "logits/rejected": -0.8861704468727112, "logps/chosen": -185.3894805908203, "logps/rejected": -326.05328369140625, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -0.8682779669761658, "rewards/margins": 13.351167678833008, "rewards/rejected": -14.219446182250977, "step": 1235 }, { "epoch": 0.42, "learning_rate": 1.8332310522155577e-06, "logits/chosen": -0.9015997648239136, "logits/rejected": -0.8488801121711731, "logps/chosen": -199.66085815429688, "logps/rejected": -291.4209289550781, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -0.7210353016853333, "rewards/margins": 12.048638343811035, "rewards/rejected": -12.769674301147461, "step": 1236 }, { "epoch": 0.42, "learning_rate": 1.832925318873796e-06, "logits/chosen": -0.9678904414176941, "logits/rejected": -0.9459937810897827, "logps/chosen": -168.6718292236328, "logps/rejected": -297.0518798828125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.0247538089752197, "rewards/margins": 12.005967140197754, "rewards/rejected": -14.030721664428711, "step": 1237 }, { "epoch": 0.42, "learning_rate": 1.8326193310848439e-06, "logits/chosen": -1.01480233669281, "logits/rejected": -1.0078105926513672, "logps/chosen": -160.8006134033203, "logps/rejected": -309.9812316894531, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.3625818192958832, "rewards/margins": 12.174973487854004, "rewards/rejected": -12.537553787231445, "step": 1238 }, { "epoch": 0.42, "learning_rate": 1.8323130889421768e-06, "logits/chosen": -0.9439703822135925, "logits/rejected": -0.9145991802215576, "logps/chosen": -201.84725952148438, "logps/rejected": -283.0241394042969, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3068307340145111, "rewards/margins": 12.52558708190918, "rewards/rejected": -12.832417488098145, "step": 1239 }, { "epoch": 0.42, "learning_rate": 1.8320065925393467e-06, "logits/chosen": -0.9385749697685242, "logits/rejected": -0.917969822883606, "logps/chosen": -213.3739471435547, "logps/rejected": -312.59918212890625, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -1.1337065696716309, "rewards/margins": 11.70157241821289, "rewards/rejected": -12.83527946472168, "step": 1240 }, { "epoch": 0.42, "learning_rate": 1.831699841969985e-06, "logits/chosen": -0.8247060179710388, "logits/rejected": -0.8120309114456177, "logps/chosen": -172.5798797607422, "logps/rejected": -336.8374328613281, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -2.146306037902832, "rewards/margins": 12.45776653289795, "rewards/rejected": -14.604072570800781, "step": 1241 }, { "epoch": 0.42, "learning_rate": 1.8313928373277991e-06, "logits/chosen": -0.9075035452842712, "logits/rejected": -0.8558856844902039, "logps/chosen": -262.5984191894531, "logps/rejected": -270.56134033203125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 0.3143201470375061, "rewards/margins": 10.756462097167969, "rewards/rejected": -10.442142486572266, "step": 1242 }, { "epoch": 0.42, "learning_rate": 1.8310855787065749e-06, "logits/chosen": -0.9612535834312439, "logits/rejected": -0.9406633377075195, "logps/chosen": -172.9947052001953, "logps/rejected": -311.42718505859375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6483818888664246, "rewards/margins": 13.099512100219727, "rewards/rejected": -13.747892379760742, "step": 1243 }, { "epoch": 0.42, "learning_rate": 1.8307780662001754e-06, "logits/chosen": -0.8590777516365051, "logits/rejected": -0.8015803694725037, "logps/chosen": -161.65347290039062, "logps/rejected": -306.2671203613281, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -1.390470027923584, "rewards/margins": 11.455011367797852, "rewards/rejected": -12.845480918884277, "step": 1244 }, { "epoch": 0.42, "learning_rate": 1.8304702999025418e-06, "logits/chosen": -0.9472905397415161, "logits/rejected": -0.9027786254882812, "logps/chosen": -193.5587615966797, "logps/rejected": -250.39700317382812, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.22159971296787262, "rewards/margins": 10.532757759094238, "rewards/rejected": -10.754356384277344, "step": 1245 }, { "epoch": 0.43, "learning_rate": 1.8301622799076925e-06, "logits/chosen": -0.841653048992157, "logits/rejected": -0.8121934533119202, "logps/chosen": -231.31048583984375, "logps/rejected": -334.68212890625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.2527366578578949, "rewards/margins": 11.65760612487793, "rewards/rejected": -11.910341262817383, "step": 1246 }, { "epoch": 0.43, "learning_rate": 1.8298540063097232e-06, "logits/chosen": -0.7961524724960327, "logits/rejected": -0.7779024839401245, "logps/chosen": -178.65884399414062, "logps/rejected": -296.5047302246094, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.2671318054199219, "rewards/margins": 11.207058906555176, "rewards/rejected": -12.474191665649414, "step": 1247 }, { "epoch": 0.43, "learning_rate": 1.8295454792028071e-06, "logits/chosen": -0.9470467567443848, "logits/rejected": -0.9286381006240845, "logps/chosen": -89.88256072998047, "logps/rejected": -237.6074676513672, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.0447131395339966, "rewards/margins": 12.354362487792969, "rewards/rejected": -13.399076461791992, "step": 1248 }, { "epoch": 0.43, "learning_rate": 1.8292366986811948e-06, "logits/chosen": -0.9468322396278381, "logits/rejected": -0.9227274060249329, "logps/chosen": -212.09622192382812, "logps/rejected": -335.7763671875, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -0.0004895627498626709, "rewards/margins": 12.16650676727295, "rewards/rejected": -12.166997909545898, "step": 1249 }, { "epoch": 0.43, "learning_rate": 1.8289276648392154e-06, "logits/chosen": -0.9662436246871948, "logits/rejected": -0.9581830501556396, "logps/chosen": -172.61932373046875, "logps/rejected": -316.8883056640625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.6378732323646545, "rewards/margins": 12.875425338745117, "rewards/rejected": -13.513297080993652, "step": 1250 }, { "epoch": 0.43, "learning_rate": 1.8286183777712733e-06, "logits/chosen": -0.8371226191520691, "logits/rejected": -0.8329094052314758, "logps/chosen": -163.49496459960938, "logps/rejected": -294.1361999511719, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -2.3552045822143555, "rewards/margins": 9.807323455810547, "rewards/rejected": -12.162528038024902, "step": 1251 }, { "epoch": 0.43, "learning_rate": 1.828308837571852e-06, "logits/chosen": -0.8681791424751282, "logits/rejected": -0.8510374426841736, "logps/chosen": -229.7481689453125, "logps/rejected": -295.29742431640625, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.21442240476608276, "rewards/margins": 8.69997787475586, "rewards/rejected": -8.914400100708008, "step": 1252 }, { "epoch": 0.43, "learning_rate": 1.827999044335512e-06, "logits/chosen": -0.7968842387199402, "logits/rejected": -0.7621651887893677, "logps/chosen": -228.38330078125, "logps/rejected": -260.66058349609375, "loss": 0.0576, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9152237176895142, "rewards/margins": 7.549922466278076, "rewards/rejected": -8.465145111083984, "step": 1253 }, { "epoch": 0.43, "learning_rate": 1.8276889981568906e-06, "logits/chosen": -0.9307717084884644, "logits/rejected": -0.920933187007904, "logps/chosen": -164.68951416015625, "logps/rejected": -242.0632781982422, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.7688117027282715, "rewards/margins": 11.070810317993164, "rewards/rejected": -11.839622497558594, "step": 1254 }, { "epoch": 0.43, "learning_rate": 1.8273786991307023e-06, "logits/chosen": -0.8744919300079346, "logits/rejected": -0.8314228653907776, "logps/chosen": -210.59410095214844, "logps/rejected": -285.1327209472656, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.43691152334213257, "rewards/margins": 11.182598114013672, "rewards/rejected": -11.61950969696045, "step": 1255 }, { "epoch": 0.43, "learning_rate": 1.8270681473517398e-06, "logits/chosen": -0.8906936645507812, "logits/rejected": -0.8661737442016602, "logps/chosen": -225.90011596679688, "logps/rejected": -305.60931396484375, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": 0.3499177098274231, "rewards/margins": 9.632306098937988, "rewards/rejected": -9.282387733459473, "step": 1256 }, { "epoch": 0.43, "learning_rate": 1.826757342914872e-06, "logits/chosen": -0.8688669800758362, "logits/rejected": -0.8577086329460144, "logps/chosen": -194.78707885742188, "logps/rejected": -349.38311767578125, "loss": 0.0345, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5914813280105591, "rewards/margins": 13.314608573913574, "rewards/rejected": -13.906089782714844, "step": 1257 }, { "epoch": 0.43, "learning_rate": 1.8264462859150453e-06, "logits/chosen": -0.8213034272193909, "logits/rejected": -0.8157131671905518, "logps/chosen": -186.37435913085938, "logps/rejected": -293.63177490234375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.3091102838516235, "rewards/margins": 11.496885299682617, "rewards/rejected": -12.80599594116211, "step": 1258 }, { "epoch": 0.43, "learning_rate": 1.8261349764472836e-06, "logits/chosen": -0.9031201601028442, "logits/rejected": -0.8741194009780884, "logps/chosen": -199.77479553222656, "logps/rejected": -280.63934326171875, "loss": 0.0813, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0839890241622925, "rewards/margins": 11.152383804321289, "rewards/rejected": -12.236372947692871, "step": 1259 }, { "epoch": 0.43, "learning_rate": 1.8258234146066873e-06, "logits/chosen": -0.9258015155792236, "logits/rejected": -0.9112226963043213, "logps/chosen": -184.7515106201172, "logps/rejected": -293.223876953125, "loss": 0.0319, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1687326729297638, "rewards/margins": 10.657729148864746, "rewards/rejected": -10.826461791992188, "step": 1260 }, { "epoch": 0.43, "learning_rate": 1.8255116004884349e-06, "logits/chosen": -0.8941941857337952, "logits/rejected": -0.8676390647888184, "logps/chosen": -190.14859008789062, "logps/rejected": -223.42689514160156, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.204405426979065, "rewards/margins": 7.704296588897705, "rewards/rejected": -8.908700942993164, "step": 1261 }, { "epoch": 0.43, "learning_rate": 1.8251995341877805e-06, "logits/chosen": -0.9562628269195557, "logits/rejected": -0.9138213992118835, "logps/chosen": -227.58282470703125, "logps/rejected": -339.4836730957031, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.5435515642166138, "rewards/margins": 11.884092330932617, "rewards/rejected": -12.427643775939941, "step": 1262 }, { "epoch": 0.43, "learning_rate": 1.8248872158000563e-06, "logits/chosen": -0.8344477415084839, "logits/rejected": -0.8050529956817627, "logps/chosen": -157.00006103515625, "logps/rejected": -224.28990173339844, "loss": 0.0252, "rewards/accuracies": 0.9375, "rewards/chosen": -0.23946836590766907, "rewards/margins": 7.98174524307251, "rewards/rejected": -8.221213340759277, "step": 1263 }, { "epoch": 0.43, "learning_rate": 1.8245746454206716e-06, "logits/chosen": -0.9163603782653809, "logits/rejected": -0.9055339694023132, "logps/chosen": -211.12313842773438, "logps/rejected": -328.16558837890625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0896612405776978, "rewards/margins": 10.533846855163574, "rewards/rejected": -11.62350845336914, "step": 1264 }, { "epoch": 0.43, "learning_rate": 1.8242618231451121e-06, "logits/chosen": -0.9213427305221558, "logits/rejected": -0.8874141573905945, "logps/chosen": -203.36044311523438, "logps/rejected": -268.5150146484375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.6032722592353821, "rewards/margins": 10.616762161254883, "rewards/rejected": -11.2200345993042, "step": 1265 }, { "epoch": 0.43, "learning_rate": 1.8239487490689408e-06, "logits/chosen": -0.8225479125976562, "logits/rejected": -0.8073660135269165, "logps/chosen": -191.97561645507812, "logps/rejected": -344.31549072265625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.644601821899414, "rewards/margins": 13.26441764831543, "rewards/rejected": -14.909019470214844, "step": 1266 }, { "epoch": 0.43, "learning_rate": 1.823635423287797e-06, "logits/chosen": -0.8226759433746338, "logits/rejected": -0.8176583051681519, "logps/chosen": -149.27468872070312, "logps/rejected": -241.94168090820312, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.8287453651428223, "rewards/margins": 8.925206184387207, "rewards/rejected": -9.753951072692871, "step": 1267 }, { "epoch": 0.43, "learning_rate": 1.8233218458973983e-06, "logits/chosen": -0.9029960036277771, "logits/rejected": -0.8608399033546448, "logps/chosen": -235.3726348876953, "logps/rejected": -274.559326171875, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -0.46951884031295776, "rewards/margins": 10.245697975158691, "rewards/rejected": -10.715216636657715, "step": 1268 }, { "epoch": 0.43, "learning_rate": 1.8230080169935373e-06, "logits/chosen": -0.8921152353286743, "logits/rejected": -0.8688262701034546, "logps/chosen": -170.3649444580078, "logps/rejected": -267.7683410644531, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.43263697624206543, "rewards/margins": 10.843549728393555, "rewards/rejected": -11.276187896728516, "step": 1269 }, { "epoch": 0.43, "learning_rate": 1.8226939366720852e-06, "logits/chosen": -0.9432691335678101, "logits/rejected": -0.8980540633201599, "logps/chosen": -232.58009338378906, "logps/rejected": -308.6145324707031, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.2627606391906738, "rewards/margins": 10.903946876525879, "rewards/rejected": -12.166707992553711, "step": 1270 }, { "epoch": 0.43, "learning_rate": 1.8223796050289888e-06, "logits/chosen": -0.8881767392158508, "logits/rejected": -0.8594375252723694, "logps/chosen": -155.35960388183594, "logps/rejected": -249.23143005371094, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.804008424282074, "rewards/margins": 11.205831527709961, "rewards/rejected": -12.009840965270996, "step": 1271 }, { "epoch": 0.43, "learning_rate": 1.822065022160272e-06, "logits/chosen": -0.8808547854423523, "logits/rejected": -0.8675630688667297, "logps/chosen": -194.0996551513672, "logps/rejected": -301.996337890625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.7504928112030029, "rewards/margins": 10.671891212463379, "rewards/rejected": -11.422384262084961, "step": 1272 }, { "epoch": 0.43, "learning_rate": 1.821750188162036e-06, "logits/chosen": -0.9228824973106384, "logits/rejected": -0.8938508629798889, "logps/chosen": -220.36898803710938, "logps/rejected": -335.3470458984375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.6444342732429504, "rewards/margins": 12.784276962280273, "rewards/rejected": -13.428712844848633, "step": 1273 }, { "epoch": 0.43, "learning_rate": 1.8214351031304575e-06, "logits/chosen": -0.9050974249839783, "logits/rejected": -0.8906092643737793, "logps/chosen": -218.9488525390625, "logps/rejected": -353.8611145019531, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.4135836064815521, "rewards/margins": 12.443094253540039, "rewards/rejected": -12.85667896270752, "step": 1274 }, { "epoch": 0.44, "learning_rate": 1.8211197671617912e-06, "logits/chosen": -0.8504093885421753, "logits/rejected": -0.8540719151496887, "logps/chosen": -257.4512023925781, "logps/rejected": -401.4331359863281, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": 0.056565508246421814, "rewards/margins": 14.383136749267578, "rewards/rejected": -14.326571464538574, "step": 1275 }, { "epoch": 0.44, "learning_rate": 1.820804180352368e-06, "logits/chosen": -0.9211336374282837, "logits/rejected": -0.8916823267936707, "logps/chosen": -159.4019012451172, "logps/rejected": -287.33563232421875, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.8662312030792236, "rewards/margins": 11.445428848266602, "rewards/rejected": -12.311660766601562, "step": 1276 }, { "epoch": 0.44, "learning_rate": 1.8204883427985948e-06, "logits/chosen": -0.8712146282196045, "logits/rejected": -0.8560615181922913, "logps/chosen": -212.33531188964844, "logps/rejected": -357.49017333984375, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.4405328035354614, "rewards/margins": 13.91098403930664, "rewards/rejected": -15.351517677307129, "step": 1277 }, { "epoch": 0.44, "learning_rate": 1.8201722545969557e-06, "logits/chosen": -0.8238831162452698, "logits/rejected": -0.8065966963768005, "logps/chosen": -153.98802185058594, "logps/rejected": -332.5576171875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.7019253373146057, "rewards/margins": 13.769619941711426, "rewards/rejected": -14.47154426574707, "step": 1278 }, { "epoch": 0.44, "learning_rate": 1.8198559158440116e-06, "logits/chosen": -0.7927325367927551, "logits/rejected": -0.7615318894386292, "logps/chosen": -217.57395935058594, "logps/rejected": -266.26629638671875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.16719812154769897, "rewards/margins": 9.74797248840332, "rewards/rejected": -9.915170669555664, "step": 1279 }, { "epoch": 0.44, "learning_rate": 1.8195393266363996e-06, "logits/chosen": -0.8815146684646606, "logits/rejected": -0.8439169526100159, "logps/chosen": -144.12176513671875, "logps/rejected": -257.6587219238281, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.7127891778945923, "rewards/margins": 10.429073333740234, "rewards/rejected": -11.141863822937012, "step": 1280 }, { "epoch": 0.44, "learning_rate": 1.8192224870708332e-06, "logits/chosen": -0.8256202936172485, "logits/rejected": -0.78514564037323, "logps/chosen": -153.3806610107422, "logps/rejected": -256.2804260253906, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.7089264392852783, "rewards/margins": 11.691160202026367, "rewards/rejected": -13.400087356567383, "step": 1281 }, { "epoch": 0.44, "learning_rate": 1.8189053972441025e-06, "logits/chosen": -0.8477368950843811, "logits/rejected": -0.8127739429473877, "logps/chosen": -176.553466796875, "logps/rejected": -282.9512634277344, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.0715054273605347, "rewards/margins": 11.759611129760742, "rewards/rejected": -12.831116676330566, "step": 1282 }, { "epoch": 0.44, "learning_rate": 1.8185880572530738e-06, "logits/chosen": -0.8500077128410339, "logits/rejected": -0.8155609965324402, "logps/chosen": -196.4631805419922, "logps/rejected": -348.2651672363281, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.34314948320388794, "rewards/margins": 16.09331512451172, "rewards/rejected": -16.436464309692383, "step": 1283 }, { "epoch": 0.44, "learning_rate": 1.8182704671946907e-06, "logits/chosen": -0.830402135848999, "logits/rejected": -0.8258501291275024, "logps/chosen": -190.38955688476562, "logps/rejected": -279.0792541503906, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.9034274816513062, "rewards/margins": 10.561105728149414, "rewards/rejected": -11.4645357131958, "step": 1284 }, { "epoch": 0.44, "learning_rate": 1.817952627165972e-06, "logits/chosen": -0.8571619391441345, "logits/rejected": -0.8352792263031006, "logps/chosen": -194.1080780029297, "logps/rejected": -276.5443420410156, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6569656729698181, "rewards/margins": 10.854655265808105, "rewards/rejected": -11.51162052154541, "step": 1285 }, { "epoch": 0.44, "learning_rate": 1.817634537264014e-06, "logits/chosen": -0.7476161122322083, "logits/rejected": -0.7195551991462708, "logps/chosen": -209.87496948242188, "logps/rejected": -299.80950927734375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.3170840740203857, "rewards/margins": 11.18697452545166, "rewards/rejected": -12.504058837890625, "step": 1286 }, { "epoch": 0.44, "learning_rate": 1.8173161975859883e-06, "logits/chosen": -0.9556801319122314, "logits/rejected": -0.9356818199157715, "logps/chosen": -202.5937042236328, "logps/rejected": -298.035400390625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.11707049608230591, "rewards/margins": 12.022764205932617, "rewards/rejected": -12.139835357666016, "step": 1287 }, { "epoch": 0.44, "learning_rate": 1.8169976082291433e-06, "logits/chosen": -0.9104233384132385, "logits/rejected": -0.8711007833480835, "logps/chosen": -223.98052978515625, "logps/rejected": -340.85369873046875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.6392090916633606, "rewards/margins": 13.18230152130127, "rewards/rejected": -13.821510314941406, "step": 1288 }, { "epoch": 0.44, "learning_rate": 1.8166787692908038e-06, "logits/chosen": -0.8951667547225952, "logits/rejected": -0.8408105373382568, "logps/chosen": -261.6872863769531, "logps/rejected": -248.09791564941406, "loss": 0.0942, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6815125942230225, "rewards/margins": 8.735315322875977, "rewards/rejected": -9.416827201843262, "step": 1289 }, { "epoch": 0.44, "learning_rate": 1.816359680868371e-06, "logits/chosen": -0.8355748057365417, "logits/rejected": -0.8212712407112122, "logps/chosen": -211.6549530029297, "logps/rejected": -323.59832763671875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 0.5428466796875, "rewards/margins": 11.677457809448242, "rewards/rejected": -11.134611129760742, "step": 1290 }, { "epoch": 0.44, "learning_rate": 1.8160403430593215e-06, "logits/chosen": -0.7792780995368958, "logits/rejected": -0.7523386478424072, "logps/chosen": -193.88726806640625, "logps/rejected": -285.80517578125, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -0.7411798238754272, "rewards/margins": 9.188414573669434, "rewards/rejected": -9.929595947265625, "step": 1291 }, { "epoch": 0.44, "learning_rate": 1.8157207559612088e-06, "logits/chosen": -0.7686810493469238, "logits/rejected": -0.74378502368927, "logps/chosen": -271.06072998046875, "logps/rejected": -374.4466857910156, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.13316147029399872, "rewards/margins": 11.453062057495117, "rewards/rejected": -11.586223602294922, "step": 1292 }, { "epoch": 0.44, "learning_rate": 1.8154009196716625e-06, "logits/chosen": -0.9295034408569336, "logits/rejected": -0.8876021504402161, "logps/chosen": -152.4528350830078, "logps/rejected": -236.9258270263672, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.1278774738311768, "rewards/margins": 10.020209312438965, "rewards/rejected": -11.148086547851562, "step": 1293 }, { "epoch": 0.44, "learning_rate": 1.815080834288388e-06, "logits/chosen": -0.7964305281639099, "logits/rejected": -0.7699946165084839, "logps/chosen": -192.9584503173828, "logps/rejected": -304.23529052734375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.8706855177879333, "rewards/margins": 12.376998901367188, "rewards/rejected": -13.247684478759766, "step": 1294 }, { "epoch": 0.44, "learning_rate": 1.8147604999091668e-06, "logits/chosen": -0.7531099319458008, "logits/rejected": -0.7214336395263672, "logps/chosen": -221.10606384277344, "logps/rejected": -341.04998779296875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.0282337665557861, "rewards/margins": 13.442619323730469, "rewards/rejected": -14.470852851867676, "step": 1295 }, { "epoch": 0.44, "learning_rate": 1.8144399166318568e-06, "logits/chosen": -0.731694221496582, "logits/rejected": -0.7274720072746277, "logps/chosen": -219.22549438476562, "logps/rejected": -346.65301513671875, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": -0.4869089126586914, "rewards/margins": 12.681963920593262, "rewards/rejected": -13.168872833251953, "step": 1296 }, { "epoch": 0.44, "learning_rate": 1.814119084554392e-06, "logits/chosen": -0.823129415512085, "logits/rejected": -0.8032830357551575, "logps/chosen": -146.48533630371094, "logps/rejected": -242.3901824951172, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.7284127473831177, "rewards/margins": 11.036276817321777, "rewards/rejected": -11.764690399169922, "step": 1297 }, { "epoch": 0.44, "learning_rate": 1.813798003774782e-06, "logits/chosen": -0.8927532434463501, "logits/rejected": -0.8650166392326355, "logps/chosen": -197.27554321289062, "logps/rejected": -258.87335205078125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.27791497111320496, "rewards/margins": 10.811220169067383, "rewards/rejected": -11.08913516998291, "step": 1298 }, { "epoch": 0.44, "learning_rate": 1.8134766743911123e-06, "logits/chosen": -0.8702358603477478, "logits/rejected": -0.8458930253982544, "logps/chosen": -191.81646728515625, "logps/rejected": -319.22900390625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.754356324672699, "rewards/margins": 13.333198547363281, "rewards/rejected": -14.087554931640625, "step": 1299 }, { "epoch": 0.44, "learning_rate": 1.8131550965015447e-06, "logits/chosen": -0.7772387862205505, "logits/rejected": -0.7395877242088318, "logps/chosen": -204.53616333007812, "logps/rejected": -293.6227111816406, "loss": 0.0239, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4721124768257141, "rewards/margins": 10.451971054077148, "rewards/rejected": -10.924083709716797, "step": 1300 }, { "epoch": 0.44, "learning_rate": 1.812833270204317e-06, "logits/chosen": -0.8769416213035583, "logits/rejected": -0.822391152381897, "logps/chosen": -217.41722106933594, "logps/rejected": -274.9205322265625, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.23708797991275787, "rewards/margins": 11.278910636901855, "rewards/rejected": -11.515998840332031, "step": 1301 }, { "epoch": 0.44, "learning_rate": 1.8125111955977427e-06, "logits/chosen": -0.8604322075843811, "logits/rejected": -0.8482630848884583, "logps/chosen": -199.56417846679688, "logps/rejected": -295.7652282714844, "loss": 0.0209, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2388797998428345, "rewards/margins": 10.404674530029297, "rewards/rejected": -11.643553733825684, "step": 1302 }, { "epoch": 0.44, "learning_rate": 1.812188872780211e-06, "logits/chosen": -0.919533371925354, "logits/rejected": -0.9040093421936035, "logps/chosen": -184.32675170898438, "logps/rejected": -293.8070983886719, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.2587432265281677, "rewards/margins": 12.651577949523926, "rewards/rejected": -12.39283561706543, "step": 1303 }, { "epoch": 0.45, "learning_rate": 1.811866301850187e-06, "logits/chosen": -0.9242814779281616, "logits/rejected": -0.9198191165924072, "logps/chosen": -151.96864318847656, "logps/rejected": -306.1453857421875, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.8946777582168579, "rewards/margins": 12.273355484008789, "rewards/rejected": -13.168033599853516, "step": 1304 }, { "epoch": 0.45, "learning_rate": 1.811543482906212e-06, "logits/chosen": -0.9412828683853149, "logits/rejected": -0.9134724140167236, "logps/chosen": -218.2959442138672, "logps/rejected": -312.0902099609375, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.2488238513469696, "rewards/margins": 11.645251274108887, "rewards/rejected": -11.894075393676758, "step": 1305 }, { "epoch": 0.45, "learning_rate": 1.8112204160469026e-06, "logits/chosen": -0.8071958422660828, "logits/rejected": -0.799265444278717, "logps/chosen": -198.4914093017578, "logps/rejected": -330.65533447265625, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -1.2518258094787598, "rewards/margins": 12.101410865783691, "rewards/rejected": -13.353235244750977, "step": 1306 }, { "epoch": 0.45, "learning_rate": 1.810897101370951e-06, "logits/chosen": -0.8576924800872803, "logits/rejected": -0.8313103914260864, "logps/chosen": -200.25466918945312, "logps/rejected": -291.598388671875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.7238911390304565, "rewards/margins": 9.036298751831055, "rewards/rejected": -9.7601900100708, "step": 1307 }, { "epoch": 0.45, "learning_rate": 1.8105735389771254e-06, "logits/chosen": -0.8213300108909607, "logits/rejected": -0.7891456484794617, "logps/chosen": -201.91156005859375, "logps/rejected": -287.76239013671875, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.12511634826660156, "rewards/margins": 11.23020076751709, "rewards/rejected": -11.355317115783691, "step": 1308 }, { "epoch": 0.45, "learning_rate": 1.81024972896427e-06, "logits/chosen": -0.8840909004211426, "logits/rejected": -0.8586518168449402, "logps/chosen": -237.38308715820312, "logps/rejected": -359.355224609375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.824358344078064, "rewards/margins": 12.0161771774292, "rewards/rejected": -13.840535163879395, "step": 1309 }, { "epoch": 0.45, "learning_rate": 1.809925671431304e-06, "logits/chosen": -0.8049446940422058, "logits/rejected": -0.7877117991447449, "logps/chosen": -202.4441680908203, "logps/rejected": -282.0062561035156, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.8297747373580933, "rewards/margins": 10.268756866455078, "rewards/rejected": -12.098531723022461, "step": 1310 }, { "epoch": 0.45, "learning_rate": 1.8096013664772227e-06, "logits/chosen": -0.7361057996749878, "logits/rejected": -0.7009215354919434, "logps/chosen": -196.32876586914062, "logps/rejected": -333.78717041015625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.4115579128265381, "rewards/margins": 12.114669799804688, "rewards/rejected": -12.526226043701172, "step": 1311 }, { "epoch": 0.45, "learning_rate": 1.8092768142010968e-06, "logits/chosen": -0.8625467419624329, "logits/rejected": -0.8520607948303223, "logps/chosen": -147.8010711669922, "logps/rejected": -235.15908813476562, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.7018247842788696, "rewards/margins": 9.699538230895996, "rewards/rejected": -10.401362419128418, "step": 1312 }, { "epoch": 0.45, "learning_rate": 1.8089520147020721e-06, "logits/chosen": -0.8727328777313232, "logits/rejected": -0.8551067113876343, "logps/chosen": -174.41183471679688, "logps/rejected": -298.744873046875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.8148925304412842, "rewards/margins": 11.602606773376465, "rewards/rejected": -12.417498588562012, "step": 1313 }, { "epoch": 0.45, "learning_rate": 1.808626968079371e-06, "logits/chosen": -0.8618468046188354, "logits/rejected": -0.8321620225906372, "logps/chosen": -182.58151245117188, "logps/rejected": -315.7815856933594, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.8889837265014648, "rewards/margins": 13.141471862792969, "rewards/rejected": -14.03045654296875, "step": 1314 }, { "epoch": 0.45, "learning_rate": 1.80830167443229e-06, "logits/chosen": -0.8434241414070129, "logits/rejected": -0.8361705541610718, "logps/chosen": -241.6572265625, "logps/rejected": -364.5027770996094, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -1.0060796737670898, "rewards/margins": 11.622936248779297, "rewards/rejected": -12.629015922546387, "step": 1315 }, { "epoch": 0.45, "learning_rate": 1.8079761338602028e-06, "logits/chosen": -0.8109169006347656, "logits/rejected": -0.7956953048706055, "logps/chosen": -168.7012939453125, "logps/rejected": -248.65640258789062, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.2989234924316406, "rewards/margins": 8.818170547485352, "rewards/rejected": -10.117094993591309, "step": 1316 }, { "epoch": 0.45, "learning_rate": 1.8076503464625566e-06, "logits/chosen": -0.8248729109764099, "logits/rejected": -0.8103413581848145, "logps/chosen": -245.45816040039062, "logps/rejected": -397.45318603515625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.2645459771156311, "rewards/margins": 13.7835054397583, "rewards/rejected": -14.048048973083496, "step": 1317 }, { "epoch": 0.45, "learning_rate": 1.8073243123388758e-06, "logits/chosen": -0.8728429079055786, "logits/rejected": -0.8573363423347473, "logps/chosen": -123.80121612548828, "logps/rejected": -197.2467041015625, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.8417472839355469, "rewards/margins": 8.581016540527344, "rewards/rejected": -9.422762870788574, "step": 1318 }, { "epoch": 0.45, "learning_rate": 1.8069980315887586e-06, "logits/chosen": -0.6932070851325989, "logits/rejected": -0.6739871501922607, "logps/chosen": -118.62682342529297, "logps/rejected": -221.33993530273438, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.6364561915397644, "rewards/margins": 10.126411437988281, "rewards/rejected": -9.489954948425293, "step": 1319 }, { "epoch": 0.45, "learning_rate": 1.8066715043118794e-06, "logits/chosen": -0.7861067056655884, "logits/rejected": -0.7822412252426147, "logps/chosen": -215.98907470703125, "logps/rejected": -313.4100341796875, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.8215377330780029, "rewards/margins": 9.90938949584961, "rewards/rejected": -10.730927467346191, "step": 1320 }, { "epoch": 0.45, "learning_rate": 1.8063447306079884e-06, "logits/chosen": -0.9031177759170532, "logits/rejected": -0.8727332949638367, "logps/chosen": -207.62261962890625, "logps/rejected": -270.46697998046875, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -0.8313158750534058, "rewards/margins": 10.141761779785156, "rewards/rejected": -10.973077774047852, "step": 1321 }, { "epoch": 0.45, "learning_rate": 1.80601771057691e-06, "logits/chosen": -0.7516474723815918, "logits/rejected": -0.7392072081565857, "logps/chosen": -201.2897186279297, "logps/rejected": -280.16192626953125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.2222758531570435, "rewards/margins": 11.233688354492188, "rewards/rejected": -12.455964088439941, "step": 1322 }, { "epoch": 0.45, "learning_rate": 1.8056904443185439e-06, "logits/chosen": -0.8856947422027588, "logits/rejected": -0.8572745323181152, "logps/chosen": -211.46266174316406, "logps/rejected": -281.5928955078125, "loss": 0.0775, "rewards/accuracies": 1.0, "rewards/chosen": -0.07201218605041504, "rewards/margins": 10.834244728088379, "rewards/rejected": -10.906256675720215, "step": 1323 }, { "epoch": 0.45, "learning_rate": 1.805362931932866e-06, "logits/chosen": -0.8434321284294128, "logits/rejected": -0.7949769496917725, "logps/chosen": -229.88198852539062, "logps/rejected": -315.903564453125, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -0.9547314643859863, "rewards/margins": 13.208671569824219, "rewards/rejected": -14.163402557373047, "step": 1324 }, { "epoch": 0.45, "learning_rate": 1.8050351735199265e-06, "logits/chosen": -0.7736734747886658, "logits/rejected": -0.7421947121620178, "logps/chosen": -198.89035034179688, "logps/rejected": -250.29025268554688, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -0.8548562526702881, "rewards/margins": 8.939332008361816, "rewards/rejected": -9.794189453125, "step": 1325 }, { "epoch": 0.45, "learning_rate": 1.8047071691798515e-06, "logits/chosen": -0.7990137338638306, "logits/rejected": -0.764236330986023, "logps/chosen": -231.070068359375, "logps/rejected": -382.1717834472656, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -0.4846240282058716, "rewards/margins": 13.552424430847168, "rewards/rejected": -14.037050247192383, "step": 1326 }, { "epoch": 0.45, "learning_rate": 1.8043789190128413e-06, "logits/chosen": -0.8550387620925903, "logits/rejected": -0.8192448616027832, "logps/chosen": -227.6249542236328, "logps/rejected": -347.5718078613281, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.7973707318305969, "rewards/margins": 13.797468185424805, "rewards/rejected": -13.00009822845459, "step": 1327 }, { "epoch": 0.45, "learning_rate": 1.8040504231191723e-06, "logits/chosen": -0.8656715154647827, "logits/rejected": -0.8401843309402466, "logps/chosen": -213.58908081054688, "logps/rejected": -293.8030700683594, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -1.0522841215133667, "rewards/margins": 11.158697128295898, "rewards/rejected": -12.210980415344238, "step": 1328 }, { "epoch": 0.45, "learning_rate": 1.8037216815991947e-06, "logits/chosen": -0.7623288631439209, "logits/rejected": -0.7319392561912537, "logps/chosen": -181.3868408203125, "logps/rejected": -265.6129150390625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 0.605713963508606, "rewards/margins": 11.391671180725098, "rewards/rejected": -10.785957336425781, "step": 1329 }, { "epoch": 0.45, "learning_rate": 1.8033926945533352e-06, "logits/chosen": -0.8826385140419006, "logits/rejected": -0.8534901738166809, "logps/chosen": -256.04949951171875, "logps/rejected": -361.06915283203125, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.4112873077392578, "rewards/margins": 11.722200393676758, "rewards/rejected": -13.133487701416016, "step": 1330 }, { "epoch": 0.45, "learning_rate": 1.8030634620820948e-06, "logits/chosen": -0.8194355368614197, "logits/rejected": -0.8078205585479736, "logps/chosen": -199.74774169921875, "logps/rejected": -307.521240234375, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.24823355674743652, "rewards/margins": 11.802779197692871, "rewards/rejected": -12.051012992858887, "step": 1331 }, { "epoch": 0.45, "learning_rate": 1.802733984286049e-06, "logits/chosen": -0.8170106410980225, "logits/rejected": -0.7942227721214294, "logps/chosen": -183.41845703125, "logps/rejected": -305.8529052734375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.2811121940612793, "rewards/margins": 11.077672004699707, "rewards/rejected": -11.358785629272461, "step": 1332 }, { "epoch": 0.45, "learning_rate": 1.802404261265849e-06, "logits/chosen": -1.0052013397216797, "logits/rejected": -0.9780867099761963, "logps/chosen": -226.69476318359375, "logps/rejected": -363.89794921875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 0.07419270277023315, "rewards/margins": 14.087048530578613, "rewards/rejected": -14.012855529785156, "step": 1333 }, { "epoch": 0.46, "learning_rate": 1.8020742931222208e-06, "logits/chosen": -0.836261510848999, "logits/rejected": -0.8022025227546692, "logps/chosen": -220.72091674804688, "logps/rejected": -351.2113952636719, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0150699615478516, "rewards/margins": 12.8187255859375, "rewards/rejected": -13.833797454833984, "step": 1334 }, { "epoch": 0.46, "learning_rate": 1.8017440799559647e-06, "logits/chosen": -0.8948416113853455, "logits/rejected": -0.8597502708435059, "logps/chosen": -182.6409454345703, "logps/rejected": -262.8419189453125, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -1.7283581495285034, "rewards/margins": 10.297736167907715, "rewards/rejected": -12.026095390319824, "step": 1335 }, { "epoch": 0.46, "learning_rate": 1.8014136218679566e-06, "logits/chosen": -0.8781014680862427, "logits/rejected": -0.8468568921089172, "logps/chosen": -144.8345489501953, "logps/rejected": -240.119140625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.0473954677581787, "rewards/margins": 9.389409065246582, "rewards/rejected": -10.43680477142334, "step": 1336 }, { "epoch": 0.46, "learning_rate": 1.8010829189591467e-06, "logits/chosen": -0.8318423628807068, "logits/rejected": -0.8206349611282349, "logps/chosen": -115.79733276367188, "logps/rejected": -226.0734100341797, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.5512995719909668, "rewards/margins": 9.961996078491211, "rewards/rejected": -11.513296127319336, "step": 1337 }, { "epoch": 0.46, "learning_rate": 1.8007519713305603e-06, "logits/chosen": -0.8605482578277588, "logits/rejected": -0.8243541717529297, "logps/chosen": -192.4595184326172, "logps/rejected": -268.0455322265625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.7818377017974854, "rewards/margins": 9.892126083374023, "rewards/rejected": -10.67396354675293, "step": 1338 }, { "epoch": 0.46, "learning_rate": 1.8004207790832973e-06, "logits/chosen": -0.7994627356529236, "logits/rejected": -0.7713568806648254, "logps/chosen": -257.8935546875, "logps/rejected": -318.2850646972656, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.42808592319488525, "rewards/margins": 11.681319236755371, "rewards/rejected": -12.109405517578125, "step": 1339 }, { "epoch": 0.46, "learning_rate": 1.8000893423185325e-06, "logits/chosen": -0.7909426093101501, "logits/rejected": -0.7930725812911987, "logps/chosen": -153.60964965820312, "logps/rejected": -282.8760986328125, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.13698303699493408, "rewards/margins": 11.099075317382812, "rewards/rejected": -10.962091445922852, "step": 1340 }, { "epoch": 0.46, "learning_rate": 1.7997576611375145e-06, "logits/chosen": -0.8186220526695251, "logits/rejected": -0.784988284111023, "logps/chosen": -109.82406616210938, "logps/rejected": -204.30860900878906, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -1.2697765827178955, "rewards/margins": 9.772456169128418, "rewards/rejected": -11.04223346710205, "step": 1341 }, { "epoch": 0.46, "learning_rate": 1.7994257356415683e-06, "logits/chosen": -0.8228194117546082, "logits/rejected": -0.8149482011795044, "logps/chosen": -172.64683532714844, "logps/rejected": -367.57098388671875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.0306051969528198, "rewards/margins": 13.566703796386719, "rewards/rejected": -14.597310066223145, "step": 1342 }, { "epoch": 0.46, "learning_rate": 1.7990935659320923e-06, "logits/chosen": -0.7869368195533752, "logits/rejected": -0.7519600987434387, "logps/chosen": -233.62762451171875, "logps/rejected": -326.51513671875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.6878098249435425, "rewards/margins": 12.417576789855957, "rewards/rejected": -13.105387687683105, "step": 1343 }, { "epoch": 0.46, "learning_rate": 1.7987611521105595e-06, "logits/chosen": -0.801300048828125, "logits/rejected": -0.7579471468925476, "logps/chosen": -221.8763427734375, "logps/rejected": -327.63604736328125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.748105525970459, "rewards/margins": 12.740678787231445, "rewards/rejected": -14.488784790039062, "step": 1344 }, { "epoch": 0.46, "learning_rate": 1.798428494278518e-06, "logits/chosen": -0.8741844892501831, "logits/rejected": -0.8616809248924255, "logps/chosen": -223.85467529296875, "logps/rejected": -370.7120361328125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.2263880968093872, "rewards/margins": 11.921072959899902, "rewards/rejected": -12.1474609375, "step": 1345 }, { "epoch": 0.46, "learning_rate": 1.7980955925375899e-06, "logits/chosen": -0.8870084881782532, "logits/rejected": -0.878656268119812, "logps/chosen": -133.94993591308594, "logps/rejected": -293.099609375, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -1.1158357858657837, "rewards/margins": 12.688614845275879, "rewards/rejected": -13.804451942443848, "step": 1346 }, { "epoch": 0.46, "learning_rate": 1.797762446989472e-06, "logits/chosen": -0.8662945628166199, "logits/rejected": -0.8316484093666077, "logps/chosen": -173.6237335205078, "logps/rejected": -318.4337158203125, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.9189452528953552, "rewards/margins": 12.913951873779297, "rewards/rejected": -13.832897186279297, "step": 1347 }, { "epoch": 0.46, "learning_rate": 1.7974290577359365e-06, "logits/chosen": -0.8179447650909424, "logits/rejected": -0.7897517681121826, "logps/chosen": -226.60247802734375, "logps/rejected": -395.7723388671875, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.0342785120010376, "rewards/margins": 13.119166374206543, "rewards/rejected": -14.153444290161133, "step": 1348 }, { "epoch": 0.46, "learning_rate": 1.797095424878828e-06, "logits/chosen": -0.9393850564956665, "logits/rejected": -0.8981322646141052, "logps/chosen": -204.73204040527344, "logps/rejected": -280.904541015625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 0.2869521975517273, "rewards/margins": 12.861991882324219, "rewards/rejected": -12.575040817260742, "step": 1349 }, { "epoch": 0.46, "learning_rate": 1.796761548520068e-06, "logits/chosen": -0.9295297861099243, "logits/rejected": -0.8992589712142944, "logps/chosen": -221.4350128173828, "logps/rejected": -321.6278076171875, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": -0.7072958946228027, "rewards/margins": 12.333650588989258, "rewards/rejected": -13.040946960449219, "step": 1350 }, { "epoch": 0.46, "learning_rate": 1.7964274287616502e-06, "logits/chosen": -0.8256145119667053, "logits/rejected": -0.7979663610458374, "logps/chosen": -217.1974639892578, "logps/rejected": -314.001953125, "loss": 0.0167, "rewards/accuracies": 0.9375, "rewards/chosen": 0.23901806771755219, "rewards/margins": 13.167156219482422, "rewards/rejected": -12.92813777923584, "step": 1351 }, { "epoch": 0.46, "learning_rate": 1.7960930657056437e-06, "logits/chosen": -0.7114533185958862, "logits/rejected": -0.6667627692222595, "logps/chosen": -247.61793518066406, "logps/rejected": -294.21380615234375, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.033425286412239075, "rewards/margins": 12.260238647460938, "rewards/rejected": -12.293663024902344, "step": 1352 }, { "epoch": 0.46, "learning_rate": 1.7957584594541922e-06, "logits/chosen": -0.8525786995887756, "logits/rejected": -0.8105593323707581, "logps/chosen": -221.80545043945312, "logps/rejected": -304.8814697265625, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -1.2748374938964844, "rewards/margins": 11.949190139770508, "rewards/rejected": -13.224029541015625, "step": 1353 }, { "epoch": 0.46, "learning_rate": 1.7954236101095126e-06, "logits/chosen": -0.9389524459838867, "logits/rejected": -0.9220773577690125, "logps/chosen": -250.8657989501953, "logps/rejected": -384.9174499511719, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.12171410769224167, "rewards/margins": 13.71306037902832, "rewards/rejected": -13.834774017333984, "step": 1354 }, { "epoch": 0.46, "learning_rate": 1.7950885177738977e-06, "logits/chosen": -0.797858476638794, "logits/rejected": -0.78495192527771, "logps/chosen": -202.41046142578125, "logps/rejected": -327.6152038574219, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": 0.12958672642707825, "rewards/margins": 13.073654174804688, "rewards/rejected": -12.94406795501709, "step": 1355 }, { "epoch": 0.46, "learning_rate": 1.794753182549713e-06, "logits/chosen": -0.8517117500305176, "logits/rejected": -0.8463538289070129, "logps/chosen": -121.22441101074219, "logps/rejected": -238.52915954589844, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.45010244846344, "rewards/margins": 9.857183456420898, "rewards/rejected": -11.30728530883789, "step": 1356 }, { "epoch": 0.46, "learning_rate": 1.7944176045393987e-06, "logits/chosen": -0.7783589363098145, "logits/rejected": -0.7740770578384399, "logps/chosen": -172.48458862304688, "logps/rejected": -312.83935546875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.123544692993164, "rewards/margins": 12.118762016296387, "rewards/rejected": -13.24230670928955, "step": 1357 }, { "epoch": 0.46, "learning_rate": 1.794081783845469e-06, "logits/chosen": -0.8395879864692688, "logits/rejected": -0.8173091411590576, "logps/chosen": -177.76275634765625, "logps/rejected": -238.15032958984375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.1569676399230957, "rewards/margins": 9.506814002990723, "rewards/rejected": -10.663782119750977, "step": 1358 }, { "epoch": 0.46, "learning_rate": 1.7937457205705139e-06, "logits/chosen": -0.8582652807235718, "logits/rejected": -0.821345329284668, "logps/chosen": -261.78216552734375, "logps/rejected": -292.1777648925781, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.9707947969436646, "rewards/margins": 8.02750301361084, "rewards/rejected": -8.998298645019531, "step": 1359 }, { "epoch": 0.46, "learning_rate": 1.7934094148171944e-06, "logits/chosen": -0.7616480588912964, "logits/rejected": -0.7441171407699585, "logps/chosen": -167.42056274414062, "logps/rejected": -250.58447265625, "loss": 0.0367, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1955604553222656, "rewards/margins": 9.958598136901855, "rewards/rejected": -11.154159545898438, "step": 1360 }, { "epoch": 0.46, "learning_rate": 1.7930728666882482e-06, "logits/chosen": -0.8531308174133301, "logits/rejected": -0.8169324994087219, "logps/chosen": -258.20208740234375, "logps/rejected": -392.0423583984375, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.44145989418029785, "rewards/margins": 13.88356876373291, "rewards/rejected": -14.325028419494629, "step": 1361 }, { "epoch": 0.46, "learning_rate": 1.7927360762864856e-06, "logits/chosen": -0.8100265860557556, "logits/rejected": -0.7980215549468994, "logps/chosen": -191.7351531982422, "logps/rejected": -332.90753173828125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9731425642967224, "rewards/margins": 14.008647918701172, "rewards/rejected": -14.981790542602539, "step": 1362 }, { "epoch": 0.47, "learning_rate": 1.792399043714792e-06, "logits/chosen": -0.8897106051445007, "logits/rejected": -0.8455422520637512, "logps/chosen": -277.89373779296875, "logps/rejected": -427.1750183105469, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.5221372842788696, "rewards/margins": 15.62132453918457, "rewards/rejected": -15.099187850952148, "step": 1363 }, { "epoch": 0.47, "learning_rate": 1.7920617690761257e-06, "logits/chosen": -0.8141496181488037, "logits/rejected": -0.7781321406364441, "logps/chosen": -214.82032775878906, "logps/rejected": -312.24432373046875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.48781508207321167, "rewards/margins": 12.852728843688965, "rewards/rejected": -13.340543746948242, "step": 1364 }, { "epoch": 0.47, "learning_rate": 1.7917242524735196e-06, "logits/chosen": -0.809324324131012, "logits/rejected": -0.8013820052146912, "logps/chosen": -198.000244140625, "logps/rejected": -315.71258544921875, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.2271726429462433, "rewards/margins": 12.016664505004883, "rewards/rejected": -12.243839263916016, "step": 1365 }, { "epoch": 0.47, "learning_rate": 1.7913864940100806e-06, "logits/chosen": -0.8221286535263062, "logits/rejected": -0.8065629601478577, "logps/chosen": -222.85238647460938, "logps/rejected": -333.92401123046875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.17635463178157806, "rewards/margins": 12.773186683654785, "rewards/rejected": -12.949541091918945, "step": 1366 }, { "epoch": 0.47, "learning_rate": 1.791048493788989e-06, "logits/chosen": -0.7872342467308044, "logits/rejected": -0.7661165595054626, "logps/chosen": -222.678955078125, "logps/rejected": -331.4288330078125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7101680040359497, "rewards/margins": 11.719133377075195, "rewards/rejected": -12.429301261901855, "step": 1367 }, { "epoch": 0.47, "learning_rate": 1.790710251913499e-06, "logits/chosen": -0.8607745170593262, "logits/rejected": -0.8280662298202515, "logps/chosen": -172.07810974121094, "logps/rejected": -251.62139892578125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.016156304627656937, "rewards/margins": 11.293601989746094, "rewards/rejected": -11.309758186340332, "step": 1368 }, { "epoch": 0.47, "learning_rate": 1.7903717684869395e-06, "logits/chosen": -0.7703752517700195, "logits/rejected": -0.7507156133651733, "logps/chosen": -207.0035400390625, "logps/rejected": -345.0559387207031, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -1.7699750661849976, "rewards/margins": 11.858838081359863, "rewards/rejected": -13.628812789916992, "step": 1369 }, { "epoch": 0.47, "learning_rate": 1.7900330436127118e-06, "logits/chosen": -0.8638386726379395, "logits/rejected": -0.808495283126831, "logps/chosen": -219.14927673339844, "logps/rejected": -221.17263793945312, "loss": 0.0542, "rewards/accuracies": 0.9375, "rewards/chosen": 0.02467823028564453, "rewards/margins": 9.103743553161621, "rewards/rejected": -9.079066276550293, "step": 1370 }, { "epoch": 0.47, "learning_rate": 1.7896940773942921e-06, "logits/chosen": -0.8124917149543762, "logits/rejected": -0.7978273630142212, "logps/chosen": -195.95114135742188, "logps/rejected": -301.4322509765625, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.6992974281311035, "rewards/margins": 10.887870788574219, "rewards/rejected": -11.58716869354248, "step": 1371 }, { "epoch": 0.47, "learning_rate": 1.7893548699352299e-06, "logits/chosen": -0.8008429408073425, "logits/rejected": -0.7716078758239746, "logps/chosen": -207.20455932617188, "logps/rejected": -325.2478332519531, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.40991613268852234, "rewards/margins": 11.679949760437012, "rewards/rejected": -12.089866638183594, "step": 1372 }, { "epoch": 0.47, "learning_rate": 1.789015421339148e-06, "logits/chosen": -0.750895082950592, "logits/rejected": -0.7163485884666443, "logps/chosen": -172.66419982910156, "logps/rejected": -278.2442626953125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.4439256191253662, "rewards/margins": 12.333617210388184, "rewards/rejected": -12.777543067932129, "step": 1373 }, { "epoch": 0.47, "learning_rate": 1.7886757317097438e-06, "logits/chosen": -0.8533534407615662, "logits/rejected": -0.8251014351844788, "logps/chosen": -205.39266967773438, "logps/rejected": -291.1278381347656, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.09189948439598083, "rewards/margins": 12.014113426208496, "rewards/rejected": -12.106014251708984, "step": 1374 }, { "epoch": 0.47, "learning_rate": 1.7883358011507874e-06, "logits/chosen": -0.7984305620193481, "logits/rejected": -0.7742848992347717, "logps/chosen": -168.3642578125, "logps/rejected": -284.41015625, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.3900391161441803, "rewards/margins": 12.778938293457031, "rewards/rejected": -13.168976783752441, "step": 1375 }, { "epoch": 0.47, "learning_rate": 1.7879956297661227e-06, "logits/chosen": -0.800369381904602, "logits/rejected": -0.7662221789360046, "logps/chosen": -212.4603271484375, "logps/rejected": -312.0048522949219, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -1.0484123229980469, "rewards/margins": 11.26258659362793, "rewards/rejected": -12.310998916625977, "step": 1376 }, { "epoch": 0.47, "learning_rate": 1.787655217659668e-06, "logits/chosen": -0.7576779723167419, "logits/rejected": -0.7519466280937195, "logps/chosen": -174.92510986328125, "logps/rejected": -267.49371337890625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.1420536041259766, "rewards/margins": 8.592673301696777, "rewards/rejected": -9.734726905822754, "step": 1377 }, { "epoch": 0.47, "learning_rate": 1.7873145649354142e-06, "logits/chosen": -0.8951123356819153, "logits/rejected": -0.8616929054260254, "logps/chosen": -261.187255859375, "logps/rejected": -382.56134033203125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.40022012591362, "rewards/margins": 13.098538398742676, "rewards/rejected": -13.498757362365723, "step": 1378 }, { "epoch": 0.47, "learning_rate": 1.7869736716974263e-06, "logits/chosen": -0.7246235013008118, "logits/rejected": -0.695183515548706, "logps/chosen": -243.7691650390625, "logps/rejected": -367.77508544921875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.6893659830093384, "rewards/margins": 14.260021209716797, "rewards/rejected": -14.949386596679688, "step": 1379 }, { "epoch": 0.47, "learning_rate": 1.7866325380498416e-06, "logits/chosen": -0.7941666841506958, "logits/rejected": -0.7690256237983704, "logps/chosen": -255.9532928466797, "logps/rejected": -440.66351318359375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.5842981934547424, "rewards/margins": 17.053224563598633, "rewards/rejected": -17.637523651123047, "step": 1380 }, { "epoch": 0.47, "learning_rate": 1.7862911640968725e-06, "logits/chosen": -0.7796670198440552, "logits/rejected": -0.7757119536399841, "logps/chosen": -133.58836364746094, "logps/rejected": -238.3734588623047, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0020347833633423, "rewards/margins": 10.128975868225098, "rewards/rejected": -11.131011009216309, "step": 1381 }, { "epoch": 0.47, "learning_rate": 1.785949549942804e-06, "logits/chosen": -0.7879679203033447, "logits/rejected": -0.7837026715278625, "logps/chosen": -132.5624237060547, "logps/rejected": -230.97959899902344, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.9976705312728882, "rewards/margins": 9.118314743041992, "rewards/rejected": -10.115985870361328, "step": 1382 }, { "epoch": 0.47, "learning_rate": 1.785607695691994e-06, "logits/chosen": -0.7541405558586121, "logits/rejected": -0.7234245538711548, "logps/chosen": -167.7085723876953, "logps/rejected": -254.9217071533203, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -1.0688307285308838, "rewards/margins": 8.55461311340332, "rewards/rejected": -9.623444557189941, "step": 1383 }, { "epoch": 0.47, "learning_rate": 1.7852656014488747e-06, "logits/chosen": -0.7789520621299744, "logits/rejected": -0.7780622243881226, "logps/chosen": -183.73580932617188, "logps/rejected": -342.14349365234375, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -2.4980998039245605, "rewards/margins": 11.427184104919434, "rewards/rejected": -13.925285339355469, "step": 1384 }, { "epoch": 0.47, "learning_rate": 1.7849232673179513e-06, "logits/chosen": -0.7240305542945862, "logits/rejected": -0.7180548310279846, "logps/chosen": -117.29716491699219, "logps/rejected": -232.56600952148438, "loss": 0.0346, "rewards/accuracies": 0.9375, "rewards/chosen": -0.255265474319458, "rewards/margins": 10.314377784729004, "rewards/rejected": -10.569644927978516, "step": 1385 }, { "epoch": 0.47, "learning_rate": 1.7845806934038016e-06, "logits/chosen": -0.8085510730743408, "logits/rejected": -0.7856676578521729, "logps/chosen": -169.40086364746094, "logps/rejected": -194.84149169921875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.0210742950439453, "rewards/margins": 8.644388198852539, "rewards/rejected": -10.665462493896484, "step": 1386 }, { "epoch": 0.47, "learning_rate": 1.7842378798110778e-06, "logits/chosen": -0.7747136354446411, "logits/rejected": -0.7474648356437683, "logps/chosen": -116.92997741699219, "logps/rejected": -247.1197052001953, "loss": 0.0701, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7583870887756348, "rewards/margins": 12.221345901489258, "rewards/rejected": -13.979732513427734, "step": 1387 }, { "epoch": 0.47, "learning_rate": 1.783894826644504e-06, "logits/chosen": -0.7968648672103882, "logits/rejected": -0.7943611741065979, "logps/chosen": -181.53697204589844, "logps/rejected": -368.64739990234375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.4376036524772644, "rewards/margins": 15.216379165649414, "rewards/rejected": -15.653984069824219, "step": 1388 }, { "epoch": 0.47, "learning_rate": 1.783551534008879e-06, "logits/chosen": -0.8848363161087036, "logits/rejected": -0.8543527126312256, "logps/chosen": -186.16036987304688, "logps/rejected": -286.9901123046875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.09999596327543259, "rewards/margins": 10.628111839294434, "rewards/rejected": -10.728108406066895, "step": 1389 }, { "epoch": 0.47, "learning_rate": 1.7832080020090736e-06, "logits/chosen": -0.7809678912162781, "logits/rejected": -0.7643985748291016, "logps/chosen": -161.5030975341797, "logps/rejected": -337.62738037109375, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.287338137626648, "rewards/margins": 13.674797058105469, "rewards/rejected": -14.96213436126709, "step": 1390 }, { "epoch": 0.47, "learning_rate": 1.7828642307500322e-06, "logits/chosen": -0.7898754477500916, "logits/rejected": -0.7818952202796936, "logps/chosen": -222.60903930664062, "logps/rejected": -385.4198913574219, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -2.0448741912841797, "rewards/margins": 14.266861915588379, "rewards/rejected": -16.311735153198242, "step": 1391 }, { "epoch": 0.48, "learning_rate": 1.7825202203367718e-06, "logits/chosen": -0.7437847852706909, "logits/rejected": -0.6998988389968872, "logps/chosen": -236.25241088867188, "logps/rejected": -297.6666259765625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.5311062335968018, "rewards/margins": 11.004447937011719, "rewards/rejected": -11.535554885864258, "step": 1392 }, { "epoch": 0.48, "learning_rate": 1.7821759708743831e-06, "logits/chosen": -0.8132649660110474, "logits/rejected": -0.7780836820602417, "logps/chosen": -186.03778076171875, "logps/rejected": -244.07652282714844, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.15938682854175568, "rewards/margins": 11.738678932189941, "rewards/rejected": -11.898065567016602, "step": 1393 }, { "epoch": 0.48, "learning_rate": 1.7818314824680298e-06, "logits/chosen": -0.7714201807975769, "logits/rejected": -0.75655198097229, "logps/chosen": -192.9052734375, "logps/rejected": -287.6629943847656, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.865771770477295, "rewards/margins": 10.185136795043945, "rewards/rejected": -12.050909042358398, "step": 1394 }, { "epoch": 0.48, "learning_rate": 1.7814867552229477e-06, "logits/chosen": -0.7954355478286743, "logits/rejected": -0.777327835559845, "logps/chosen": -185.69253540039062, "logps/rejected": -313.54559326171875, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -1.3235466480255127, "rewards/margins": 10.90588092803955, "rewards/rejected": -12.229427337646484, "step": 1395 }, { "epoch": 0.48, "learning_rate": 1.7811417892444472e-06, "logits/chosen": -0.8539643287658691, "logits/rejected": -0.824618935585022, "logps/chosen": -158.89083862304688, "logps/rejected": -198.86721801757812, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": 0.027931973338127136, "rewards/margins": 8.487203598022461, "rewards/rejected": -8.459270477294922, "step": 1396 }, { "epoch": 0.48, "learning_rate": 1.7807965846379097e-06, "logits/chosen": -0.8305478692054749, "logits/rejected": -0.7960765957832336, "logps/chosen": -189.40382385253906, "logps/rejected": -304.4444580078125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -1.8741766214370728, "rewards/margins": 12.504648208618164, "rewards/rejected": -14.378824234008789, "step": 1397 }, { "epoch": 0.48, "learning_rate": 1.7804511415087911e-06, "logits/chosen": -0.7614737749099731, "logits/rejected": -0.7640010118484497, "logps/chosen": -195.2991943359375, "logps/rejected": -320.5821838378906, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.945024013519287, "rewards/margins": 11.374277114868164, "rewards/rejected": -13.319300651550293, "step": 1398 }, { "epoch": 0.48, "learning_rate": 1.7801054599626188e-06, "logits/chosen": -0.822586715221405, "logits/rejected": -0.8186340928077698, "logps/chosen": -243.7874755859375, "logps/rejected": -365.015625, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -1.5402512550354004, "rewards/margins": 11.448237419128418, "rewards/rejected": -12.988489151000977, "step": 1399 }, { "epoch": 0.48, "learning_rate": 1.7797595401049945e-06, "logits/chosen": -0.7513074278831482, "logits/rejected": -0.7246737480163574, "logps/chosen": -160.854248046875, "logps/rejected": -290.1941833496094, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 0.08369742333889008, "rewards/margins": 12.485136032104492, "rewards/rejected": -12.401437759399414, "step": 1400 }, { "epoch": 0.48, "learning_rate": 1.7794133820415916e-06, "logits/chosen": -0.8449573516845703, "logits/rejected": -0.8250268697738647, "logps/chosen": -148.43711853027344, "logps/rejected": -266.6126403808594, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.8992863297462463, "rewards/margins": 10.857006072998047, "rewards/rejected": -11.756293296813965, "step": 1401 }, { "epoch": 0.48, "learning_rate": 1.7790669858781566e-06, "logits/chosen": -0.8269514441490173, "logits/rejected": -0.7845551371574402, "logps/chosen": -160.32740783691406, "logps/rejected": -274.77752685546875, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -0.6996049880981445, "rewards/margins": 12.381636619567871, "rewards/rejected": -13.081241607666016, "step": 1402 }, { "epoch": 0.48, "learning_rate": 1.7787203517205085e-06, "logits/chosen": -0.8066791296005249, "logits/rejected": -0.7957755327224731, "logps/chosen": -164.70008850097656, "logps/rejected": -259.1663513183594, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.44723671674728394, "rewards/margins": 11.113347053527832, "rewards/rejected": -11.56058406829834, "step": 1403 }, { "epoch": 0.48, "learning_rate": 1.7783734796745398e-06, "logits/chosen": -0.7190724015235901, "logits/rejected": -0.6785993576049805, "logps/chosen": -150.17054748535156, "logps/rejected": -294.25323486328125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.333564519882202, "rewards/margins": 12.74257755279541, "rewards/rejected": -15.076143264770508, "step": 1404 }, { "epoch": 0.48, "learning_rate": 1.778026369846215e-06, "logits/chosen": -0.7936459183692932, "logits/rejected": -0.7382724285125732, "logps/chosen": -159.99417114257812, "logps/rejected": -239.93875122070312, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.9982271194458008, "rewards/margins": 9.278047561645508, "rewards/rejected": -11.276273727416992, "step": 1405 }, { "epoch": 0.48, "learning_rate": 1.777679022341571e-06, "logits/chosen": -0.8235186338424683, "logits/rejected": -0.7828934788703918, "logps/chosen": -225.88543701171875, "logps/rejected": -330.80084228515625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5115078687667847, "rewards/margins": 13.107673645019531, "rewards/rejected": -13.619179725646973, "step": 1406 }, { "epoch": 0.48, "learning_rate": 1.7773314372667177e-06, "logits/chosen": -0.862003743648529, "logits/rejected": -0.8197764754295349, "logps/chosen": -231.09779357910156, "logps/rejected": -339.085205078125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.215745449066162, "rewards/margins": 12.712682723999023, "rewards/rejected": -13.92842960357666, "step": 1407 }, { "epoch": 0.48, "learning_rate": 1.7769836147278378e-06, "logits/chosen": -0.8231796026229858, "logits/rejected": -0.8078511357307434, "logps/chosen": -182.70948791503906, "logps/rejected": -313.2974548339844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1091831922531128, "rewards/margins": 12.015694618225098, "rewards/rejected": -13.124878883361816, "step": 1408 }, { "epoch": 0.48, "learning_rate": 1.7766355548311865e-06, "logits/chosen": -0.8640734553337097, "logits/rejected": -0.8474321961402893, "logps/chosen": -243.6631317138672, "logps/rejected": -377.8013916015625, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 1.69288969039917, "rewards/margins": 13.36331844329834, "rewards/rejected": -11.670429229736328, "step": 1409 }, { "epoch": 0.48, "learning_rate": 1.7762872576830907e-06, "logits/chosen": -0.7679747343063354, "logits/rejected": -0.7584709525108337, "logps/chosen": -174.32815551757812, "logps/rejected": -290.0274353027344, "loss": 0.0337, "rewards/accuracies": 0.875, "rewards/chosen": -0.9088997840881348, "rewards/margins": 10.36587905883789, "rewards/rejected": -11.274779319763184, "step": 1410 }, { "epoch": 0.48, "learning_rate": 1.775938723389951e-06, "logits/chosen": -0.8354907035827637, "logits/rejected": -0.8104708790779114, "logps/chosen": -224.88705444335938, "logps/rejected": -307.67559814453125, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 0.3105248212814331, "rewards/margins": 9.989224433898926, "rewards/rejected": -9.678699493408203, "step": 1411 }, { "epoch": 0.48, "learning_rate": 1.7755899520582394e-06, "logits/chosen": -0.7481581568717957, "logits/rejected": -0.7375766038894653, "logps/chosen": -237.7299041748047, "logps/rejected": -380.07025146484375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.464133858680725, "rewards/margins": 12.95572280883789, "rewards/rejected": -14.419856071472168, "step": 1412 }, { "epoch": 0.48, "learning_rate": 1.7752409437945005e-06, "logits/chosen": -0.8374996185302734, "logits/rejected": -0.8216164112091064, "logps/chosen": -216.41958618164062, "logps/rejected": -337.98956298828125, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 0.4383057951927185, "rewards/margins": 14.562824249267578, "rewards/rejected": -14.124518394470215, "step": 1413 }, { "epoch": 0.48, "learning_rate": 1.7748916987053522e-06, "logits/chosen": -0.8072883486747742, "logits/rejected": -0.7934396862983704, "logps/chosen": -158.16078186035156, "logps/rejected": -273.75372314453125, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.5100224614143372, "rewards/margins": 10.890281677246094, "rewards/rejected": -11.400303840637207, "step": 1414 }, { "epoch": 0.48, "learning_rate": 1.7745422168974834e-06, "logits/chosen": -0.7626752853393555, "logits/rejected": -0.7406042814254761, "logps/chosen": -191.2672882080078, "logps/rejected": -322.9395446777344, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -1.2046585083007812, "rewards/margins": 12.029989242553711, "rewards/rejected": -13.234647750854492, "step": 1415 }, { "epoch": 0.48, "learning_rate": 1.7741924984776567e-06, "logits/chosen": -0.88076251745224, "logits/rejected": -0.861370861530304, "logps/chosen": -176.01873779296875, "logps/rejected": -277.3642883300781, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6431759595870972, "rewards/margins": 12.212446212768555, "rewards/rejected": -12.855622291564941, "step": 1416 }, { "epoch": 0.48, "learning_rate": 1.7738425435527054e-06, "logits/chosen": -0.791793167591095, "logits/rejected": -0.7770541906356812, "logps/chosen": -212.47744750976562, "logps/rejected": -347.980712890625, "loss": 0.0314, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0381413698196411, "rewards/margins": 13.630806922912598, "rewards/rejected": -14.668949127197266, "step": 1417 }, { "epoch": 0.48, "learning_rate": 1.7734923522295362e-06, "logits/chosen": -0.8341869711875916, "logits/rejected": -0.7822365760803223, "logps/chosen": -217.20758056640625, "logps/rejected": -278.9867858886719, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 0.8016456961631775, "rewards/margins": 11.942913055419922, "rewards/rejected": -11.141266822814941, "step": 1418 }, { "epoch": 0.48, "learning_rate": 1.7731419246151283e-06, "logits/chosen": -0.8587133288383484, "logits/rejected": -0.83413165807724, "logps/chosen": -216.01637268066406, "logps/rejected": -422.9474182128906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4493216276168823, "rewards/margins": 17.698680877685547, "rewards/rejected": -19.14800262451172, "step": 1419 }, { "epoch": 0.48, "learning_rate": 1.7727912608165316e-06, "logits/chosen": -0.8266962766647339, "logits/rejected": -0.7839773297309875, "logps/chosen": -250.21548461914062, "logps/rejected": -359.3609924316406, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.434847891330719, "rewards/margins": 13.100648880004883, "rewards/rejected": -13.535496711730957, "step": 1420 }, { "epoch": 0.48, "learning_rate": 1.7724403609408696e-06, "logits/chosen": -0.7703555822372437, "logits/rejected": -0.7403845191001892, "logps/chosen": -151.540771484375, "logps/rejected": -254.4603729248047, "loss": 0.0235, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6658918857574463, "rewards/margins": 10.85291862487793, "rewards/rejected": -11.518810272216797, "step": 1421 }, { "epoch": 0.49, "learning_rate": 1.7720892250953373e-06, "logits/chosen": -0.8521228432655334, "logits/rejected": -0.8304892778396606, "logps/chosen": -225.6035614013672, "logps/rejected": -276.82293701171875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.485591858625412, "rewards/margins": 8.478404998779297, "rewards/rejected": -8.963996887207031, "step": 1422 }, { "epoch": 0.49, "learning_rate": 1.7717378533872015e-06, "logits/chosen": -0.7986995577812195, "logits/rejected": -0.7737226486206055, "logps/chosen": -196.56224060058594, "logps/rejected": -265.8131103515625, "loss": 0.0186, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4512217342853546, "rewards/margins": 9.417794227600098, "rewards/rejected": -9.869016647338867, "step": 1423 }, { "epoch": 0.49, "learning_rate": 1.7713862459238022e-06, "logits/chosen": -0.859768807888031, "logits/rejected": -0.8288081884384155, "logps/chosen": -224.48475646972656, "logps/rejected": -379.3704833984375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.6211622953414917, "rewards/margins": 14.995768547058105, "rewards/rejected": -15.61693000793457, "step": 1424 }, { "epoch": 0.49, "learning_rate": 1.77103440281255e-06, "logits/chosen": -0.7787747979164124, "logits/rejected": -0.7659395337104797, "logps/chosen": -230.038330078125, "logps/rejected": -371.55877685546875, "loss": 0.0359, "rewards/accuracies": 0.9375, "rewards/chosen": -0.31409186124801636, "rewards/margins": 12.784512519836426, "rewards/rejected": -13.098604202270508, "step": 1425 }, { "epoch": 0.49, "learning_rate": 1.770682324160928e-06, "logits/chosen": -0.867309033870697, "logits/rejected": -0.8434286117553711, "logps/chosen": -213.5791015625, "logps/rejected": -296.7678527832031, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.0880351066589355, "rewards/margins": 11.316925048828125, "rewards/rejected": -12.404961585998535, "step": 1426 }, { "epoch": 0.49, "learning_rate": 1.7703300100764918e-06, "logits/chosen": -0.8834157586097717, "logits/rejected": -0.8637316226959229, "logps/chosen": -168.12388610839844, "logps/rejected": -260.4972839355469, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.14494486153125763, "rewards/margins": 10.748311042785645, "rewards/rejected": -10.893255233764648, "step": 1427 }, { "epoch": 0.49, "learning_rate": 1.7699774606668682e-06, "logits/chosen": -0.8304847478866577, "logits/rejected": -0.8177350163459778, "logps/chosen": -275.6114807128906, "logps/rejected": -408.3031921386719, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.6410923004150391, "rewards/margins": 14.4375, "rewards/rejected": -13.796407699584961, "step": 1428 }, { "epoch": 0.49, "learning_rate": 1.7696246760397565e-06, "logits/chosen": -0.7386487722396851, "logits/rejected": -0.7050730586051941, "logps/chosen": -211.58413696289062, "logps/rejected": -280.6153564453125, "loss": 0.0144, "rewards/accuracies": 0.9375, "rewards/chosen": -0.15675939619541168, "rewards/margins": 9.664798736572266, "rewards/rejected": -9.821558952331543, "step": 1429 }, { "epoch": 0.49, "learning_rate": 1.769271656302927e-06, "logits/chosen": -0.872414231300354, "logits/rejected": -0.856224536895752, "logps/chosen": -164.4801483154297, "logps/rejected": -260.0815124511719, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9149661064147949, "rewards/margins": 10.033707618713379, "rewards/rejected": -10.948675155639648, "step": 1430 }, { "epoch": 0.49, "learning_rate": 1.768918401564223e-06, "logits/chosen": -0.9030879735946655, "logits/rejected": -0.8643295764923096, "logps/chosen": -153.2635498046875, "logps/rejected": -245.58612060546875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.6057617664337158, "rewards/margins": 10.699085235595703, "rewards/rejected": -12.30484676361084, "step": 1431 }, { "epoch": 0.49, "learning_rate": 1.7685649119315586e-06, "logits/chosen": -0.7332339882850647, "logits/rejected": -0.7029168605804443, "logps/chosen": -272.9918212890625, "logps/rejected": -356.31817626953125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0123075246810913, "rewards/margins": 12.823399543762207, "rewards/rejected": -13.835707664489746, "step": 1432 }, { "epoch": 0.49, "learning_rate": 1.7682111875129198e-06, "logits/chosen": -0.7881835103034973, "logits/rejected": -0.7622583508491516, "logps/chosen": -147.1434326171875, "logps/rejected": -283.019287109375, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.3421070873737335, "rewards/margins": 14.107664108276367, "rewards/rejected": -14.449771881103516, "step": 1433 }, { "epoch": 0.49, "learning_rate": 1.7678572284163649e-06, "logits/chosen": -0.8482468128204346, "logits/rejected": -0.8153382539749146, "logps/chosen": -152.5101318359375, "logps/rejected": -253.83180236816406, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.0215729475021362, "rewards/margins": 11.889888763427734, "rewards/rejected": -12.911460876464844, "step": 1434 }, { "epoch": 0.49, "learning_rate": 1.7675030347500233e-06, "logits/chosen": -0.8771329522132874, "logits/rejected": -0.8474529981613159, "logps/chosen": -200.86241149902344, "logps/rejected": -316.9364929199219, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -0.19546064734458923, "rewards/margins": 13.0993013381958, "rewards/rejected": -13.29476261138916, "step": 1435 }, { "epoch": 0.49, "learning_rate": 1.7671486066220965e-06, "logits/chosen": -0.8155699372291565, "logits/rejected": -0.789558470249176, "logps/chosen": -208.43214416503906, "logps/rejected": -283.3085632324219, "loss": 0.0279, "rewards/accuracies": 0.9375, "rewards/chosen": 0.32382065057754517, "rewards/margins": 10.476360321044922, "rewards/rejected": -10.15254020690918, "step": 1436 }, { "epoch": 0.49, "learning_rate": 1.7667939441408572e-06, "logits/chosen": -0.7493857741355896, "logits/rejected": -0.7357322573661804, "logps/chosen": -224.4440460205078, "logps/rejected": -380.46142578125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.9657471179962158, "rewards/margins": 12.694454193115234, "rewards/rejected": -13.660201072692871, "step": 1437 }, { "epoch": 0.49, "learning_rate": 1.76643904741465e-06, "logits/chosen": -0.7754668593406677, "logits/rejected": -0.7595434188842773, "logps/chosen": -212.46583557128906, "logps/rejected": -361.7055969238281, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.9164134860038757, "rewards/margins": 12.50834846496582, "rewards/rejected": -13.424761772155762, "step": 1438 }, { "epoch": 0.49, "learning_rate": 1.766083916551891e-06, "logits/chosen": -0.8483474850654602, "logits/rejected": -0.8421084880828857, "logps/chosen": -180.55813598632812, "logps/rejected": -312.5306091308594, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.5868497490882874, "rewards/margins": 11.11898136138916, "rewards/rejected": -11.705831527709961, "step": 1439 }, { "epoch": 0.49, "learning_rate": 1.765728551661067e-06, "logits/chosen": -0.8162346482276917, "logits/rejected": -0.7826743721961975, "logps/chosen": -238.68338012695312, "logps/rejected": -294.03265380859375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6983698606491089, "rewards/margins": 10.942476272583008, "rewards/rejected": -11.64084529876709, "step": 1440 }, { "epoch": 0.49, "learning_rate": 1.7653729528507383e-06, "logits/chosen": -0.7339433431625366, "logits/rejected": -0.7116876244544983, "logps/chosen": -163.54837036132812, "logps/rejected": -254.10015869140625, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": -0.45935705304145813, "rewards/margins": 11.269224166870117, "rewards/rejected": -11.728580474853516, "step": 1441 }, { "epoch": 0.49, "learning_rate": 1.765017120229535e-06, "logits/chosen": -0.7811201214790344, "logits/rejected": -0.739229142665863, "logps/chosen": -189.27554321289062, "logps/rejected": -268.8426818847656, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.4676870107650757, "rewards/margins": 12.081586837768555, "rewards/rejected": -12.549273490905762, "step": 1442 }, { "epoch": 0.49, "learning_rate": 1.764661053906159e-06, "logits/chosen": -0.805346667766571, "logits/rejected": -0.7830820679664612, "logps/chosen": -220.39871215820312, "logps/rejected": -337.94000244140625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.20432791113853455, "rewards/margins": 12.810446739196777, "rewards/rejected": -13.014775276184082, "step": 1443 }, { "epoch": 0.49, "learning_rate": 1.7643047539893834e-06, "logits/chosen": -0.6776820421218872, "logits/rejected": -0.668932318687439, "logps/chosen": -104.18621826171875, "logps/rejected": -207.18368530273438, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.42636165022850037, "rewards/margins": 9.629112243652344, "rewards/rejected": -10.055474281311035, "step": 1444 }, { "epoch": 0.49, "learning_rate": 1.763948220588053e-06, "logits/chosen": -0.808405876159668, "logits/rejected": -0.765556275844574, "logps/chosen": -198.1292724609375, "logps/rejected": -280.7914733886719, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.06504667550325394, "rewards/margins": 12.851943016052246, "rewards/rejected": -12.786895751953125, "step": 1445 }, { "epoch": 0.49, "learning_rate": 1.763591453811084e-06, "logits/chosen": -0.8776959180831909, "logits/rejected": -0.8710606694221497, "logps/chosen": -167.33665466308594, "logps/rejected": -279.6964111328125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.3956544399261475, "rewards/margins": 11.014020919799805, "rewards/rejected": -12.409673690795898, "step": 1446 }, { "epoch": 0.49, "learning_rate": 1.7632344537674638e-06, "logits/chosen": -0.8372305035591125, "logits/rejected": -0.8143565654754639, "logps/chosen": -120.32472229003906, "logps/rejected": -240.1089630126953, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.5178435444831848, "rewards/margins": 10.076627731323242, "rewards/rejected": -10.594470977783203, "step": 1447 }, { "epoch": 0.49, "learning_rate": 1.7628772205662505e-06, "logits/chosen": -0.7966048121452332, "logits/rejected": -0.769184410572052, "logps/chosen": -209.86932373046875, "logps/rejected": -289.4974365234375, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -0.8658283352851868, "rewards/margins": 11.784496307373047, "rewards/rejected": -12.650325775146484, "step": 1448 }, { "epoch": 0.49, "learning_rate": 1.7625197543165745e-06, "logits/chosen": -0.8414686918258667, "logits/rejected": -0.8148047924041748, "logps/chosen": -226.460693359375, "logps/rejected": -368.44012451171875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.24249397218227386, "rewards/margins": 12.850227355957031, "rewards/rejected": -13.092720985412598, "step": 1449 }, { "epoch": 0.49, "learning_rate": 1.7621620551276364e-06, "logits/chosen": -0.814388632774353, "logits/rejected": -0.7745023369789124, "logps/chosen": -224.18060302734375, "logps/rejected": -317.639404296875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0500543117523193, "rewards/margins": 10.825942993164062, "rewards/rejected": -11.875997543334961, "step": 1450 }, { "epoch": 0.5, "learning_rate": 1.7618041231087087e-06, "logits/chosen": -0.7480593919754028, "logits/rejected": -0.7297401428222656, "logps/chosen": -221.64952087402344, "logps/rejected": -314.10943603515625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.3751676082611084, "rewards/margins": 11.406968116760254, "rewards/rejected": -12.782135009765625, "step": 1451 }, { "epoch": 0.5, "learning_rate": 1.7614459583691342e-06, "logits/chosen": -0.7463592886924744, "logits/rejected": -0.7432133555412292, "logps/chosen": -120.76539611816406, "logps/rejected": -243.78045654296875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.971858024597168, "rewards/margins": 9.778365135192871, "rewards/rejected": -10.750223159790039, "step": 1452 }, { "epoch": 0.5, "learning_rate": 1.761087561018328e-06, "logits/chosen": -0.7424542903900146, "logits/rejected": -0.719727635383606, "logps/chosen": -215.63671875, "logps/rejected": -363.2474365234375, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.6205394268035889, "rewards/margins": 13.540130615234375, "rewards/rejected": -14.16066837310791, "step": 1453 }, { "epoch": 0.5, "learning_rate": 1.7607289311657749e-06, "logits/chosen": -0.8954802751541138, "logits/rejected": -0.8705878257751465, "logps/chosen": -221.81752014160156, "logps/rejected": -353.8275146484375, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6746504902839661, "rewards/margins": 13.962858200073242, "rewards/rejected": -14.637508392333984, "step": 1454 }, { "epoch": 0.5, "learning_rate": 1.7603700689210318e-06, "logits/chosen": -0.8791318535804749, "logits/rejected": -0.8592961430549622, "logps/chosen": -223.39735412597656, "logps/rejected": -365.6047668457031, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2388931512832642, "rewards/margins": 12.575153350830078, "rewards/rejected": -13.814047813415527, "step": 1455 }, { "epoch": 0.5, "learning_rate": 1.760010974393726e-06, "logits/chosen": -0.8835947513580322, "logits/rejected": -0.8214399814605713, "logps/chosen": -214.6200714111328, "logps/rejected": -286.2276306152344, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.8170965909957886, "rewards/margins": 9.812731742858887, "rewards/rejected": -10.629828453063965, "step": 1456 }, { "epoch": 0.5, "learning_rate": 1.7596516476935558e-06, "logits/chosen": -0.8689154982566833, "logits/rejected": -0.8379906415939331, "logps/chosen": -202.748046875, "logps/rejected": -253.66342163085938, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 0.35381460189819336, "rewards/margins": 10.310452461242676, "rewards/rejected": -9.956637382507324, "step": 1457 }, { "epoch": 0.5, "learning_rate": 1.7592920889302905e-06, "logits/chosen": -0.7466452121734619, "logits/rejected": -0.7152895927429199, "logps/chosen": -204.05374145507812, "logps/rejected": -230.7808074951172, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.6077283620834351, "rewards/margins": 8.720026016235352, "rewards/rejected": -9.327754020690918, "step": 1458 }, { "epoch": 0.5, "learning_rate": 1.7589322982137706e-06, "logits/chosen": -0.7916058301925659, "logits/rejected": -0.7766072154045105, "logps/chosen": -194.16751098632812, "logps/rejected": -314.80841064453125, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -2.5034983158111572, "rewards/margins": 11.650163650512695, "rewards/rejected": -14.15366268157959, "step": 1459 }, { "epoch": 0.5, "learning_rate": 1.7585722756539072e-06, "logits/chosen": -0.8145028352737427, "logits/rejected": -0.7951784729957581, "logps/chosen": -169.50991821289062, "logps/rejected": -240.21209716796875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.5415918827056885, "rewards/margins": 10.80090618133545, "rewards/rejected": -11.342498779296875, "step": 1460 }, { "epoch": 0.5, "learning_rate": 1.7582120213606821e-06, "logits/chosen": -0.8536046743392944, "logits/rejected": -0.8327876925468445, "logps/chosen": -161.92271423339844, "logps/rejected": -301.16925048828125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.1430751085281372, "rewards/margins": 13.160255432128906, "rewards/rejected": -14.303330421447754, "step": 1461 }, { "epoch": 0.5, "learning_rate": 1.757851535444148e-06, "logits/chosen": -0.7789602279663086, "logits/rejected": -0.7437139749526978, "logps/chosen": -208.10830688476562, "logps/rejected": -297.508544921875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.1718530654907227, "rewards/margins": 11.8040771484375, "rewards/rejected": -12.975931167602539, "step": 1462 }, { "epoch": 0.5, "learning_rate": 1.7574908180144284e-06, "logits/chosen": -0.8591733574867249, "logits/rejected": -0.8359217643737793, "logps/chosen": -221.4634246826172, "logps/rejected": -314.962646484375, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.5266247987747192, "rewards/margins": 11.948895454406738, "rewards/rejected": -13.475519180297852, "step": 1463 }, { "epoch": 0.5, "learning_rate": 1.7571298691817175e-06, "logits/chosen": -0.8030514121055603, "logits/rejected": -0.7984466552734375, "logps/chosen": -244.4355010986328, "logps/rejected": -353.1297302246094, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.6466379165649414, "rewards/margins": 12.632638931274414, "rewards/rejected": -13.279276847839355, "step": 1464 }, { "epoch": 0.5, "learning_rate": 1.7567686890562801e-06, "logits/chosen": -0.7902320027351379, "logits/rejected": -0.7667269706726074, "logps/chosen": -200.5203857421875, "logps/rejected": -338.6833190917969, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.43713945150375366, "rewards/margins": 13.141366958618164, "rewards/rejected": -13.57850456237793, "step": 1465 }, { "epoch": 0.5, "learning_rate": 1.756407277748452e-06, "logits/chosen": -1.0150423049926758, "logits/rejected": -0.9995771050453186, "logps/chosen": -240.307373046875, "logps/rejected": -332.77484130859375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.30578285455703735, "rewards/margins": 12.169427871704102, "rewards/rejected": -12.475210189819336, "step": 1466 }, { "epoch": 0.5, "learning_rate": 1.7560456353686387e-06, "logits/chosen": -0.880210280418396, "logits/rejected": -0.8396898508071899, "logps/chosen": -201.3451690673828, "logps/rejected": -305.1693115234375, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": -0.5489287376403809, "rewards/margins": 11.265085220336914, "rewards/rejected": -11.81401252746582, "step": 1467 }, { "epoch": 0.5, "learning_rate": 1.755683762027318e-06, "logits/chosen": -0.7647460699081421, "logits/rejected": -0.742613673210144, "logps/chosen": -230.0319061279297, "logps/rejected": -368.3207092285156, "loss": 0.0277, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0630468130111694, "rewards/margins": 12.914542198181152, "rewards/rejected": -13.977590560913086, "step": 1468 }, { "epoch": 0.5, "learning_rate": 1.7553216578350365e-06, "logits/chosen": -0.8541585206985474, "logits/rejected": -0.8316963911056519, "logps/chosen": -183.29855346679688, "logps/rejected": -297.14044189453125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.9931298494338989, "rewards/margins": 13.142046928405762, "rewards/rejected": -14.135176658630371, "step": 1469 }, { "epoch": 0.5, "learning_rate": 1.7549593229024121e-06, "logits/chosen": -0.7534193992614746, "logits/rejected": -0.7329376339912415, "logps/chosen": -234.756103515625, "logps/rejected": -353.12127685546875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.7852712869644165, "rewards/margins": 13.54702377319336, "rewards/rejected": -14.332297325134277, "step": 1470 }, { "epoch": 0.5, "learning_rate": 1.7545967573401334e-06, "logits/chosen": -0.796890377998352, "logits/rejected": -0.7613652944564819, "logps/chosen": -166.69664001464844, "logps/rejected": -264.6808776855469, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/chosen": 0.09108080714941025, "rewards/margins": 9.927591323852539, "rewards/rejected": -9.836511611938477, "step": 1471 }, { "epoch": 0.5, "learning_rate": 1.7542339612589589e-06, "logits/chosen": -0.763643741607666, "logits/rejected": -0.7173985242843628, "logps/chosen": -247.3744354248047, "logps/rejected": -355.6930847167969, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.15856009721755981, "rewards/margins": 15.771406173706055, "rewards/rejected": -15.612845420837402, "step": 1472 }, { "epoch": 0.5, "learning_rate": 1.753870934769718e-06, "logits/chosen": -0.8549550175666809, "logits/rejected": -0.8236480951309204, "logps/chosen": -224.96253967285156, "logps/rejected": -307.2044372558594, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.27421921491622925, "rewards/margins": 10.33586311340332, "rewards/rejected": -10.610082626342773, "step": 1473 }, { "epoch": 0.5, "learning_rate": 1.7535076779833102e-06, "logits/chosen": -0.7045982480049133, "logits/rejected": -0.6836397051811218, "logps/chosen": -235.2191162109375, "logps/rejected": -348.552001953125, "loss": 0.0824, "rewards/accuracies": 1.0, "rewards/chosen": -0.12708772718906403, "rewards/margins": 12.88094711303711, "rewards/rejected": -13.008033752441406, "step": 1474 }, { "epoch": 0.5, "learning_rate": 1.7531441910107051e-06, "logits/chosen": -0.8072516322135925, "logits/rejected": -0.790272057056427, "logps/chosen": -210.149658203125, "logps/rejected": -300.2262878417969, "loss": 0.0338, "rewards/accuracies": 0.9375, "rewards/chosen": 0.276469886302948, "rewards/margins": 10.213475227355957, "rewards/rejected": -9.937005996704102, "step": 1475 }, { "epoch": 0.5, "learning_rate": 1.7527804739629434e-06, "logits/chosen": -0.8230287432670593, "logits/rejected": -0.7963164448738098, "logps/chosen": -170.43984985351562, "logps/rejected": -258.6441955566406, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.3396727740764618, "rewards/margins": 11.877248764038086, "rewards/rejected": -12.21692180633545, "step": 1476 }, { "epoch": 0.5, "learning_rate": 1.7524165269511358e-06, "logits/chosen": -0.7603526711463928, "logits/rejected": -0.7098641395568848, "logps/chosen": -179.75350952148438, "logps/rejected": -265.3009338378906, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.564407467842102, "rewards/margins": 9.36370849609375, "rewards/rejected": -10.928115844726562, "step": 1477 }, { "epoch": 0.5, "learning_rate": 1.7520523500864624e-06, "logits/chosen": -0.8099430203437805, "logits/rejected": -0.7754592299461365, "logps/chosen": -243.7337188720703, "logps/rejected": -373.0899963378906, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -0.868024468421936, "rewards/margins": 13.258798599243164, "rewards/rejected": -14.126823425292969, "step": 1478 }, { "epoch": 0.5, "learning_rate": 1.7516879434801748e-06, "logits/chosen": -0.8159496784210205, "logits/rejected": -0.7938315868377686, "logps/chosen": -215.72239685058594, "logps/rejected": -356.3453063964844, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8628973364830017, "rewards/margins": 15.07347297668457, "rewards/rejected": -15.936368942260742, "step": 1479 }, { "epoch": 0.51, "learning_rate": 1.7513233072435938e-06, "logits/chosen": -0.7387050986289978, "logits/rejected": -0.7339287996292114, "logps/chosen": -166.64883422851562, "logps/rejected": -300.4131774902344, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -0.8128535151481628, "rewards/margins": 11.556769371032715, "rewards/rejected": -12.369622230529785, "step": 1480 }, { "epoch": 0.51, "learning_rate": 1.7509584414881112e-06, "logits/chosen": -0.7379423379898071, "logits/rejected": -0.7023722529411316, "logps/chosen": -144.63201904296875, "logps/rejected": -243.39208984375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.5961762070655823, "rewards/margins": 10.401494979858398, "rewards/rejected": -10.997672080993652, "step": 1481 }, { "epoch": 0.51, "learning_rate": 1.750593346325188e-06, "logits/chosen": -0.8354224562644958, "logits/rejected": -0.7972431182861328, "logps/chosen": -163.8665008544922, "logps/rejected": -282.8182373046875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -1.6917002201080322, "rewards/margins": 11.490811347961426, "rewards/rejected": -13.182511329650879, "step": 1482 }, { "epoch": 0.51, "learning_rate": 1.7502280218663562e-06, "logits/chosen": -0.8192406296730042, "logits/rejected": -0.7897568345069885, "logps/chosen": -174.13829040527344, "logps/rejected": -326.23431396484375, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -1.7565221786499023, "rewards/margins": 12.332639694213867, "rewards/rejected": -14.089162826538086, "step": 1483 }, { "epoch": 0.51, "learning_rate": 1.7498624682232167e-06, "logits/chosen": -0.7855949401855469, "logits/rejected": -0.7667137980461121, "logps/chosen": -213.04527282714844, "logps/rejected": -352.4035949707031, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 0.8789574503898621, "rewards/margins": 13.588236808776855, "rewards/rejected": -12.70927619934082, "step": 1484 }, { "epoch": 0.51, "learning_rate": 1.7494966855074416e-06, "logits/chosen": -0.9050987958908081, "logits/rejected": -0.8870282769203186, "logps/chosen": -191.85110473632812, "logps/rejected": -311.58209228515625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 0.32421550154685974, "rewards/margins": 11.315878868103027, "rewards/rejected": -10.991662979125977, "step": 1485 }, { "epoch": 0.51, "learning_rate": 1.7491306738307723e-06, "logits/chosen": -0.812910795211792, "logits/rejected": -0.78757244348526, "logps/chosen": -215.22068786621094, "logps/rejected": -305.85858154296875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2042016983032227, "rewards/margins": 12.27049446105957, "rewards/rejected": -13.474698066711426, "step": 1486 }, { "epoch": 0.51, "learning_rate": 1.7487644333050207e-06, "logits/chosen": -0.7788853645324707, "logits/rejected": -0.7455682158470154, "logps/chosen": -142.3671875, "logps/rejected": -296.2526550292969, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.1813404560089111, "rewards/margins": 12.301173210144043, "rewards/rejected": -13.482512474060059, "step": 1487 }, { "epoch": 0.51, "learning_rate": 1.7483979640420676e-06, "logits/chosen": -0.7749877572059631, "logits/rejected": -0.738673210144043, "logps/chosen": -231.14112854003906, "logps/rejected": -351.4631652832031, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 0.8758467435836792, "rewards/margins": 13.20925235748291, "rewards/rejected": -12.333405494689941, "step": 1488 }, { "epoch": 0.51, "learning_rate": 1.7480312661538645e-06, "logits/chosen": -0.9002779126167297, "logits/rejected": -0.8592373132705688, "logps/chosen": -193.6373291015625, "logps/rejected": -288.0156555175781, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 0.508070170879364, "rewards/margins": 12.7857666015625, "rewards/rejected": -12.27769660949707, "step": 1489 }, { "epoch": 0.51, "learning_rate": 1.7476643397524329e-06, "logits/chosen": -0.6623485088348389, "logits/rejected": -0.6258158087730408, "logps/chosen": -166.72689819335938, "logps/rejected": -280.37689208984375, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -1.4044690132141113, "rewards/margins": 11.350469589233398, "rewards/rejected": -12.754936218261719, "step": 1490 }, { "epoch": 0.51, "learning_rate": 1.7472971849498635e-06, "logits/chosen": -0.778293788433075, "logits/rejected": -0.7747454643249512, "logps/chosen": -238.88497924804688, "logps/rejected": -377.4202575683594, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": 0.5115584135055542, "rewards/margins": 12.50534725189209, "rewards/rejected": -11.993788719177246, "step": 1491 }, { "epoch": 0.51, "learning_rate": 1.7469298018583166e-06, "logits/chosen": -0.7440734505653381, "logits/rejected": -0.7006043195724487, "logps/chosen": -189.46644592285156, "logps/rejected": -304.6022644042969, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/chosen": -1.0558228492736816, "rewards/margins": 12.585596084594727, "rewards/rejected": -13.641417503356934, "step": 1492 }, { "epoch": 0.51, "learning_rate": 1.7465621905900237e-06, "logits/chosen": -0.8086879253387451, "logits/rejected": -0.7972272634506226, "logps/chosen": -192.3390655517578, "logps/rejected": -348.3236389160156, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.4333204925060272, "rewards/margins": 14.570640563964844, "rewards/rejected": -15.003961563110352, "step": 1493 }, { "epoch": 0.51, "learning_rate": 1.7461943512572842e-06, "logits/chosen": -0.8327419757843018, "logits/rejected": -0.8003393411636353, "logps/chosen": -190.11178588867188, "logps/rejected": -295.7059631347656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.1847285032272339, "rewards/margins": 11.352916717529297, "rewards/rejected": -12.537644386291504, "step": 1494 }, { "epoch": 0.51, "learning_rate": 1.7458262839724678e-06, "logits/chosen": -0.8541927933692932, "logits/rejected": -0.8279600739479065, "logps/chosen": -232.34898376464844, "logps/rejected": -345.25018310546875, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7070342898368835, "rewards/margins": 10.916379928588867, "rewards/rejected": -11.623414993286133, "step": 1495 }, { "epoch": 0.51, "learning_rate": 1.7454579888480146e-06, "logits/chosen": -0.7719184756278992, "logits/rejected": -0.7692685127258301, "logps/chosen": -135.2449188232422, "logps/rejected": -242.40135192871094, "loss": 0.0476, "rewards/accuracies": 0.875, "rewards/chosen": -1.4975032806396484, "rewards/margins": 9.277897834777832, "rewards/rejected": -10.77540111541748, "step": 1496 }, { "epoch": 0.51, "learning_rate": 1.745089465996433e-06, "logits/chosen": -0.7748767137527466, "logits/rejected": -0.7604696750640869, "logps/chosen": -164.42002868652344, "logps/rejected": -275.42626953125, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.22526809573173523, "rewards/margins": 10.58745288848877, "rewards/rejected": -10.812719345092773, "step": 1497 }, { "epoch": 0.51, "learning_rate": 1.744720715530302e-06, "logits/chosen": -0.8082274198532104, "logits/rejected": -0.78718101978302, "logps/chosen": -239.69166564941406, "logps/rejected": -363.5281066894531, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.7142896056175232, "rewards/margins": 10.98876953125, "rewards/rejected": -11.703059196472168, "step": 1498 }, { "epoch": 0.51, "learning_rate": 1.7443517375622703e-06, "logits/chosen": -0.8298966288566589, "logits/rejected": -0.8082895874977112, "logps/chosen": -242.69757080078125, "logps/rejected": -381.9407043457031, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8553043603897095, "rewards/margins": 14.82905101776123, "rewards/rejected": -15.684354782104492, "step": 1499 }, { "epoch": 0.51, "learning_rate": 1.7439825322050545e-06, "logits/chosen": -0.8276103138923645, "logits/rejected": -0.8080565929412842, "logps/chosen": -234.7543487548828, "logps/rejected": -353.3248291015625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.07282403111457825, "rewards/margins": 14.048355102539062, "rewards/rejected": -14.121179580688477, "step": 1500 }, { "epoch": 0.51, "learning_rate": 1.7436130995714426e-06, "logits/chosen": -0.7235240340232849, "logits/rejected": -0.7142577171325684, "logps/chosen": -197.24078369140625, "logps/rejected": -329.8974914550781, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.037978172302246, "rewards/margins": 11.447677612304688, "rewards/rejected": -12.485655784606934, "step": 1501 }, { "epoch": 0.51, "learning_rate": 1.7432434397742908e-06, "logits/chosen": -0.8904664516448975, "logits/rejected": -0.8528702259063721, "logps/chosen": -229.9987030029297, "logps/rejected": -316.9961853027344, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.5611845254898071, "rewards/margins": 12.32522964477539, "rewards/rejected": -11.764045715332031, "step": 1502 }, { "epoch": 0.51, "learning_rate": 1.7428735529265254e-06, "logits/chosen": -0.8167231678962708, "logits/rejected": -0.7992812991142273, "logps/chosen": -215.7677764892578, "logps/rejected": -308.54669189453125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.1971077919006348, "rewards/margins": 8.940117835998535, "rewards/rejected": -10.137226104736328, "step": 1503 }, { "epoch": 0.51, "learning_rate": 1.7425034391411408e-06, "logits/chosen": -0.8053687214851379, "logits/rejected": -0.7856538891792297, "logps/chosen": -237.46368408203125, "logps/rejected": -342.50469970703125, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.48769882321357727, "rewards/margins": 13.115188598632812, "rewards/rejected": -13.602886199951172, "step": 1504 }, { "epoch": 0.51, "learning_rate": 1.7421330985312027e-06, "logits/chosen": -0.8004028797149658, "logits/rejected": -0.7469224333763123, "logps/chosen": -195.50753784179688, "logps/rejected": -301.07781982421875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1782939434051514, "rewards/margins": 14.056326866149902, "rewards/rejected": -15.234619140625, "step": 1505 }, { "epoch": 0.51, "learning_rate": 1.741762531209845e-06, "logits/chosen": -0.7979966998100281, "logits/rejected": -0.7811987996101379, "logps/chosen": -202.77345275878906, "logps/rejected": -334.48699951171875, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.5132155418395996, "rewards/margins": 12.710225105285645, "rewards/rejected": -13.223442077636719, "step": 1506 }, { "epoch": 0.51, "learning_rate": 1.74139173729027e-06, "logits/chosen": -0.7819609045982361, "logits/rejected": -0.7520908713340759, "logps/chosen": -170.85693359375, "logps/rejected": -300.1407165527344, "loss": 0.0304, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0713820457458496, "rewards/margins": 11.95903205871582, "rewards/rejected": -13.030413627624512, "step": 1507 }, { "epoch": 0.51, "learning_rate": 1.741020716885751e-06, "logits/chosen": -0.8256241679191589, "logits/rejected": -0.7912272810935974, "logps/chosen": -195.14710998535156, "logps/rejected": -312.3678283691406, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8086675405502319, "rewards/margins": 11.658668518066406, "rewards/rejected": -12.467336654663086, "step": 1508 }, { "epoch": 0.52, "learning_rate": 1.7406494701096294e-06, "logits/chosen": -0.8848063349723816, "logits/rejected": -0.8607682585716248, "logps/chosen": -237.39352416992188, "logps/rejected": -354.1212158203125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.22046220302581787, "rewards/margins": 13.397079467773438, "rewards/rejected": -13.176616668701172, "step": 1509 }, { "epoch": 0.52, "learning_rate": 1.7402779970753154e-06, "logits/chosen": -0.8258998990058899, "logits/rejected": -0.8105928897857666, "logps/chosen": -196.92193603515625, "logps/rejected": -354.4984436035156, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 0.7922376394271851, "rewards/margins": 14.252161026000977, "rewards/rejected": -13.45992374420166, "step": 1510 }, { "epoch": 0.52, "learning_rate": 1.7399062978962897e-06, "logits/chosen": -0.7475597858428955, "logits/rejected": -0.7184640765190125, "logps/chosen": -213.47817993164062, "logps/rejected": -283.90911865234375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.7586238980293274, "rewards/margins": 11.126219749450684, "rewards/rejected": -11.884843826293945, "step": 1511 }, { "epoch": 0.52, "learning_rate": 1.739534372686101e-06, "logits/chosen": -0.7760681509971619, "logits/rejected": -0.7198869585990906, "logps/chosen": -215.66616821289062, "logps/rejected": -209.13763427734375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.838873028755188, "rewards/margins": 8.332603454589844, "rewards/rejected": -9.171477317810059, "step": 1512 }, { "epoch": 0.52, "learning_rate": 1.739162221558367e-06, "logits/chosen": -0.8645758628845215, "logits/rejected": -0.8169350624084473, "logps/chosen": -256.62322998046875, "logps/rejected": -348.41217041015625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.8234041333198547, "rewards/margins": 11.155445098876953, "rewards/rejected": -10.332040786743164, "step": 1513 }, { "epoch": 0.52, "learning_rate": 1.738789844626775e-06, "logits/chosen": -0.8250131607055664, "logits/rejected": -0.8044556975364685, "logps/chosen": -237.61355590820312, "logps/rejected": -376.633544921875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.36208251118659973, "rewards/margins": 13.410666465759277, "rewards/rejected": -13.772749900817871, "step": 1514 }, { "epoch": 0.52, "learning_rate": 1.7384172420050812e-06, "logits/chosen": -0.7082387208938599, "logits/rejected": -0.6756806373596191, "logps/chosen": -152.36924743652344, "logps/rejected": -279.89044189453125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.050826072692871, "rewards/margins": 12.603428840637207, "rewards/rejected": -13.654254913330078, "step": 1515 }, { "epoch": 0.52, "learning_rate": 1.7380444138071102e-06, "logits/chosen": -0.820144772529602, "logits/rejected": -0.794156014919281, "logps/chosen": -211.35220336914062, "logps/rejected": -320.85748291015625, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.468308687210083, "rewards/margins": 11.719253540039062, "rewards/rejected": -13.18756103515625, "step": 1516 }, { "epoch": 0.52, "learning_rate": 1.737671360146756e-06, "logits/chosen": -0.8779497742652893, "logits/rejected": -0.8344367146492004, "logps/chosen": -221.10682678222656, "logps/rejected": -358.31549072265625, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": -1.7818471193313599, "rewards/margins": 13.322516441345215, "rewards/rejected": -15.104363441467285, "step": 1517 }, { "epoch": 0.52, "learning_rate": 1.7372980811379815e-06, "logits/chosen": -0.8471563458442688, "logits/rejected": -0.8377019762992859, "logps/chosen": -182.10806274414062, "logps/rejected": -291.23419189453125, "loss": 0.0348, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7542519569396973, "rewards/margins": 10.14909553527832, "rewards/rejected": -11.903347969055176, "step": 1518 }, { "epoch": 0.52, "learning_rate": 1.7369245768948177e-06, "logits/chosen": -0.7933318018913269, "logits/rejected": -0.7713358402252197, "logps/chosen": -241.4262237548828, "logps/rejected": -335.612060546875, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.28273603320121765, "rewards/margins": 9.549248695373535, "rewards/rejected": -9.831986427307129, "step": 1519 }, { "epoch": 0.52, "learning_rate": 1.736550847531366e-06, "logits/chosen": -0.8952341675758362, "logits/rejected": -0.8610179424285889, "logps/chosen": -225.99981689453125, "logps/rejected": -325.84686279296875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -1.9323033094406128, "rewards/margins": 11.015168190002441, "rewards/rejected": -12.947471618652344, "step": 1520 }, { "epoch": 0.52, "learning_rate": 1.7361768931617947e-06, "logits/chosen": -0.7626213431358337, "logits/rejected": -0.7423944473266602, "logps/chosen": -162.52630615234375, "logps/rejected": -248.5435791015625, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.8705320954322815, "rewards/margins": 10.761211395263672, "rewards/rejected": -11.631742477416992, "step": 1521 }, { "epoch": 0.52, "learning_rate": 1.735802713900342e-06, "logits/chosen": -0.8202340602874756, "logits/rejected": -0.8084032535552979, "logps/chosen": -227.24371337890625, "logps/rejected": -385.9127502441406, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -0.5269811153411865, "rewards/margins": 12.75936222076416, "rewards/rejected": -13.286344528198242, "step": 1522 }, { "epoch": 0.52, "learning_rate": 1.7354283098613148e-06, "logits/chosen": -0.7911797165870667, "logits/rejected": -0.7639609575271606, "logps/chosen": -238.74012756347656, "logps/rejected": -431.2081298828125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.6959792375564575, "rewards/margins": 18.00128936767578, "rewards/rejected": -18.697269439697266, "step": 1523 }, { "epoch": 0.52, "learning_rate": 1.735053681159088e-06, "logits/chosen": -0.6560143232345581, "logits/rejected": -0.6208212375640869, "logps/chosen": -200.71087646484375, "logps/rejected": -286.1566162109375, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -1.3130207061767578, "rewards/margins": 10.644858360290527, "rewards/rejected": -11.957880020141602, "step": 1524 }, { "epoch": 0.52, "learning_rate": 1.7346788279081053e-06, "logits/chosen": -0.8742741346359253, "logits/rejected": -0.8544861078262329, "logps/chosen": -146.04908752441406, "logps/rejected": -276.5857849121094, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.41846174001693726, "rewards/margins": 13.03909683227539, "rewards/rejected": -13.457558631896973, "step": 1525 }, { "epoch": 0.52, "learning_rate": 1.7343037502228795e-06, "logits/chosen": -0.7388788461685181, "logits/rejected": -0.7381728291511536, "logps/chosen": -118.60028076171875, "logps/rejected": -230.6573944091797, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -0.4806916117668152, "rewards/margins": 10.71658706665039, "rewards/rejected": -11.19727897644043, "step": 1526 }, { "epoch": 0.52, "learning_rate": 1.7339284482179917e-06, "logits/chosen": -0.802670955657959, "logits/rejected": -0.7996589541435242, "logps/chosen": -189.36489868164062, "logps/rejected": -309.4620056152344, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.660764217376709, "rewards/margins": 11.766666412353516, "rewards/rejected": -12.427431106567383, "step": 1527 }, { "epoch": 0.52, "learning_rate": 1.7335529220080916e-06, "logits/chosen": -0.7666227221488953, "logits/rejected": -0.7316661477088928, "logps/chosen": -144.6666717529297, "logps/rejected": -226.60540771484375, "loss": 0.0585, "rewards/accuracies": 0.875, "rewards/chosen": -2.0395264625549316, "rewards/margins": 8.772706031799316, "rewards/rejected": -10.81223201751709, "step": 1528 }, { "epoch": 0.52, "learning_rate": 1.7331771717078966e-06, "logits/chosen": -0.8217092156410217, "logits/rejected": -0.8091208338737488, "logps/chosen": -158.13772583007812, "logps/rejected": -284.1222839355469, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.6794570684432983, "rewards/margins": 10.7338228225708, "rewards/rejected": -10.054367065429688, "step": 1529 }, { "epoch": 0.52, "learning_rate": 1.732801197432194e-06, "logits/chosen": -0.8484020233154297, "logits/rejected": -0.8120309114456177, "logps/chosen": -206.10922241210938, "logps/rejected": -279.4207763671875, "loss": 0.0783, "rewards/accuracies": 0.875, "rewards/chosen": 0.7139533758163452, "rewards/margins": 12.021162033081055, "rewards/rejected": -11.307209014892578, "step": 1530 }, { "epoch": 0.52, "learning_rate": 1.7324249992958384e-06, "logits/chosen": -0.8910919427871704, "logits/rejected": -0.8644669651985168, "logps/chosen": -203.88856506347656, "logps/rejected": -324.29266357421875, "loss": 0.0203, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5711846947669983, "rewards/margins": 14.224839210510254, "rewards/rejected": -14.796024322509766, "step": 1531 }, { "epoch": 0.52, "learning_rate": 1.7320485774137534e-06, "logits/chosen": -0.6684903502464294, "logits/rejected": -0.6537839770317078, "logps/chosen": -222.48486328125, "logps/rejected": -340.8659362792969, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.2734110355377197, "rewards/margins": 11.509934425354004, "rewards/rejected": -12.783344268798828, "step": 1532 }, { "epoch": 0.52, "learning_rate": 1.7316719319009305e-06, "logits/chosen": -0.7256861925125122, "logits/rejected": -0.7079952955245972, "logps/chosen": -203.65228271484375, "logps/rejected": -331.1952209472656, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -0.8784005641937256, "rewards/margins": 13.973233222961426, "rewards/rejected": -14.85163402557373, "step": 1533 }, { "epoch": 0.52, "learning_rate": 1.7312950628724295e-06, "logits/chosen": -0.8351415395736694, "logits/rejected": -0.8078033924102783, "logps/chosen": -225.48947143554688, "logps/rejected": -311.1724853515625, "loss": 0.0945, "rewards/accuracies": 1.0, "rewards/chosen": -2.4906458854675293, "rewards/margins": 10.694921493530273, "rewards/rejected": -13.185567855834961, "step": 1534 }, { "epoch": 0.52, "learning_rate": 1.730917970443379e-06, "logits/chosen": -0.8572129607200623, "logits/rejected": -0.825099766254425, "logps/chosen": -194.48617553710938, "logps/rejected": -263.97216796875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.8807461857795715, "rewards/margins": 11.022172927856445, "rewards/rejected": -10.141427040100098, "step": 1535 }, { "epoch": 0.52, "learning_rate": 1.7305406547289753e-06, "logits/chosen": -0.766954243183136, "logits/rejected": -0.751849889755249, "logps/chosen": -151.57923889160156, "logps/rejected": -266.78338623046875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.1008738279342651, "rewards/margins": 11.969651222229004, "rewards/rejected": -13.070526123046875, "step": 1536 }, { "epoch": 0.52, "learning_rate": 1.7301631158444835e-06, "logits/chosen": -0.7681335210800171, "logits/rejected": -0.7411433458328247, "logps/chosen": -215.29588317871094, "logps/rejected": -277.97003173828125, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -0.5210452079772949, "rewards/margins": 9.839550971984863, "rewards/rejected": -10.360596656799316, "step": 1537 }, { "epoch": 0.52, "learning_rate": 1.7297853539052358e-06, "logits/chosen": -0.7359298467636108, "logits/rejected": -0.7265218496322632, "logps/chosen": -180.95570373535156, "logps/rejected": -304.4936828613281, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.027139008045196533, "rewards/margins": 11.206388473510742, "rewards/rejected": -11.233527183532715, "step": 1538 }, { "epoch": 0.53, "learning_rate": 1.7294073690266342e-06, "logits/chosen": -0.8525946736335754, "logits/rejected": -0.8291616439819336, "logps/chosen": -154.3360595703125, "logps/rejected": -298.9061279296875, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.7764281034469604, "rewards/margins": 12.741601943969727, "rewards/rejected": -13.51802921295166, "step": 1539 }, { "epoch": 0.53, "learning_rate": 1.729029161324147e-06, "logits/chosen": -0.8173611164093018, "logits/rejected": -0.7902465462684631, "logps/chosen": -157.29583740234375, "logps/rejected": -230.33782958984375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.4686388671398163, "rewards/margins": 9.81564712524414, "rewards/rejected": -10.284285545349121, "step": 1540 }, { "epoch": 0.53, "learning_rate": 1.728650730913312e-06, "logits/chosen": -0.7461663484573364, "logits/rejected": -0.7419857978820801, "logps/chosen": -235.5260009765625, "logps/rejected": -375.86309814453125, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": -0.4431162476539612, "rewards/margins": 14.304668426513672, "rewards/rejected": -14.747784614562988, "step": 1541 }, { "epoch": 0.53, "learning_rate": 1.7282720779097346e-06, "logits/chosen": -0.7389093637466431, "logits/rejected": -0.7227798700332642, "logps/chosen": -155.43875122070312, "logps/rejected": -218.24685668945312, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": -1.2827707529067993, "rewards/margins": 8.952180862426758, "rewards/rejected": -10.234951972961426, "step": 1542 }, { "epoch": 0.53, "learning_rate": 1.7278932024290875e-06, "logits/chosen": -0.6825645565986633, "logits/rejected": -0.6709743738174438, "logps/chosen": -116.10980987548828, "logps/rejected": -253.9545135498047, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.4392254948616028, "rewards/margins": 12.1452054977417, "rewards/rejected": -12.584431648254395, "step": 1543 }, { "epoch": 0.53, "learning_rate": 1.7275141045871125e-06, "logits/chosen": -0.6742585301399231, "logits/rejected": -0.6239293813705444, "logps/chosen": -215.591796875, "logps/rejected": -327.93609619140625, "loss": 0.0209, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2241734266281128, "rewards/margins": 13.16635513305664, "rewards/rejected": -14.39052963256836, "step": 1544 }, { "epoch": 0.53, "learning_rate": 1.727134784499618e-06, "logits/chosen": -0.8030523657798767, "logits/rejected": -0.7720876932144165, "logps/chosen": -255.16944885253906, "logps/rejected": -369.04278564453125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -1.2484197616577148, "rewards/margins": 11.54232406616211, "rewards/rejected": -12.790742874145508, "step": 1545 }, { "epoch": 0.53, "learning_rate": 1.7267552422824819e-06, "logits/chosen": -0.7133074402809143, "logits/rejected": -0.702751636505127, "logps/chosen": -136.56861877441406, "logps/rejected": -231.52676391601562, "loss": 0.0153, "rewards/accuracies": 0.9375, "rewards/chosen": -0.964881181716919, "rewards/margins": 8.77855110168457, "rewards/rejected": -9.743432998657227, "step": 1546 }, { "epoch": 0.53, "learning_rate": 1.7263754780516483e-06, "logits/chosen": -0.7275325655937195, "logits/rejected": -0.7150706648826599, "logps/chosen": -226.73947143554688, "logps/rejected": -338.22979736328125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.27532023191452026, "rewards/margins": 11.908121109008789, "rewards/rejected": -11.632800102233887, "step": 1547 }, { "epoch": 0.53, "learning_rate": 1.7259954919231307e-06, "logits/chosen": -0.8300327658653259, "logits/rejected": -0.8148790001869202, "logps/chosen": -182.0558624267578, "logps/rejected": -306.7781982421875, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.0711424350738525, "rewards/margins": 11.865251541137695, "rewards/rejected": -12.936393737792969, "step": 1548 }, { "epoch": 0.53, "learning_rate": 1.7256152840130094e-06, "logits/chosen": -0.769858717918396, "logits/rejected": -0.7501769661903381, "logps/chosen": -187.09312438964844, "logps/rejected": -350.5317687988281, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.8635165691375732, "rewards/margins": 13.753999710083008, "rewards/rejected": -15.617517471313477, "step": 1549 }, { "epoch": 0.53, "learning_rate": 1.7252348544374322e-06, "logits/chosen": -0.7693420648574829, "logits/rejected": -0.7321252822875977, "logps/chosen": -186.51416015625, "logps/rejected": -306.274169921875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.8201753497123718, "rewards/margins": 13.877581596374512, "rewards/rejected": -14.697757720947266, "step": 1550 }, { "epoch": 0.53, "learning_rate": 1.7248542033126157e-06, "logits/chosen": -0.753078043460846, "logits/rejected": -0.7176026701927185, "logps/chosen": -194.53514099121094, "logps/rejected": -321.299072265625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.0837047100067139, "rewards/margins": 12.864241600036621, "rewards/rejected": -13.947946548461914, "step": 1551 }, { "epoch": 0.53, "learning_rate": 1.7244733307548431e-06, "logits/chosen": -0.6917634606361389, "logits/rejected": -0.6639284491539001, "logps/chosen": -165.01449584960938, "logps/rejected": -254.8365020751953, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -2.3213796615600586, "rewards/margins": 10.010520935058594, "rewards/rejected": -12.331900596618652, "step": 1552 }, { "epoch": 0.53, "learning_rate": 1.724092236880466e-06, "logits/chosen": -0.8980500102043152, "logits/rejected": -0.8637776970863342, "logps/chosen": -214.85275268554688, "logps/rejected": -292.36480712890625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.1712646782398224, "rewards/margins": 11.961524963378906, "rewards/rejected": -11.79025936126709, "step": 1553 }, { "epoch": 0.53, "learning_rate": 1.7237109218059032e-06, "logits/chosen": -0.7809678912162781, "logits/rejected": -0.740900993347168, "logps/chosen": -209.63323974609375, "logps/rejected": -302.05780029296875, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.811251163482666, "rewards/margins": 12.23800277709961, "rewards/rejected": -13.049253463745117, "step": 1554 }, { "epoch": 0.53, "learning_rate": 1.723329385647641e-06, "logits/chosen": -0.7604342699050903, "logits/rejected": -0.6909570693969727, "logps/chosen": -212.3223876953125, "logps/rejected": -282.6900939941406, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.1797226667404175, "rewards/margins": 12.360523223876953, "rewards/rejected": -13.54024600982666, "step": 1555 }, { "epoch": 0.53, "learning_rate": 1.7229476285222342e-06, "logits/chosen": -0.7832818031311035, "logits/rejected": -0.7664036154747009, "logps/chosen": -218.22454833984375, "logps/rejected": -349.4501953125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.6327348947525024, "rewards/margins": 13.47944450378418, "rewards/rejected": -14.112180709838867, "step": 1556 }, { "epoch": 0.53, "learning_rate": 1.7225656505463034e-06, "logits/chosen": -0.8697724938392639, "logits/rejected": -0.8518004417419434, "logps/chosen": -192.8612060546875, "logps/rejected": -333.487060546875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.6138532757759094, "rewards/margins": 13.912494659423828, "rewards/rejected": -14.526348114013672, "step": 1557 }, { "epoch": 0.53, "learning_rate": 1.722183451836538e-06, "logits/chosen": -0.8080925941467285, "logits/rejected": -0.7947487831115723, "logps/chosen": -222.02313232421875, "logps/rejected": -374.31903076171875, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": -0.8038129210472107, "rewards/margins": 14.793488502502441, "rewards/rejected": -15.597302436828613, "step": 1558 }, { "epoch": 0.53, "learning_rate": 1.7218010325096944e-06, "logits/chosen": -0.7903550863265991, "logits/rejected": -0.7620354294776917, "logps/chosen": -249.29562377929688, "logps/rejected": -351.7527160644531, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.062339067459106445, "rewards/margins": 15.22220516204834, "rewards/rejected": -15.159866333007812, "step": 1559 }, { "epoch": 0.53, "learning_rate": 1.7214183926825965e-06, "logits/chosen": -0.7824190258979797, "logits/rejected": -0.7661941051483154, "logps/chosen": -285.75860595703125, "logps/rejected": -427.1092834472656, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -0.37852931022644043, "rewards/margins": 15.275266647338867, "rewards/rejected": -15.65379524230957, "step": 1560 }, { "epoch": 0.53, "learning_rate": 1.7210355324721354e-06, "logits/chosen": -0.7158726453781128, "logits/rejected": -0.6765230298042297, "logps/chosen": -217.87948608398438, "logps/rejected": -298.6446228027344, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.6418012976646423, "rewards/margins": 11.976069450378418, "rewards/rejected": -12.617870330810547, "step": 1561 }, { "epoch": 0.53, "learning_rate": 1.7206524519952695e-06, "logits/chosen": -0.8125733137130737, "logits/rejected": -0.7672662138938904, "logps/chosen": -188.5677032470703, "logps/rejected": -273.291748046875, "loss": 0.0316, "rewards/accuracies": 0.9375, "rewards/chosen": -1.50419020652771, "rewards/margins": 11.189481735229492, "rewards/rejected": -12.693673133850098, "step": 1562 }, { "epoch": 0.53, "learning_rate": 1.7202691513690248e-06, "logits/chosen": -0.7727904319763184, "logits/rejected": -0.7317767143249512, "logps/chosen": -224.63824462890625, "logps/rejected": -295.5360107421875, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.5384615659713745, "rewards/margins": 11.284570693969727, "rewards/rejected": -12.82303237915039, "step": 1563 }, { "epoch": 0.53, "learning_rate": 1.7198856307104946e-06, "logits/chosen": -0.763870894908905, "logits/rejected": -0.7683395147323608, "logps/chosen": -120.58039855957031, "logps/rejected": -255.64144897460938, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.6954606771469116, "rewards/margins": 11.087822914123535, "rewards/rejected": -12.783284187316895, "step": 1564 }, { "epoch": 0.53, "learning_rate": 1.7195018901368385e-06, "logits/chosen": -0.712871253490448, "logits/rejected": -0.6912281513214111, "logps/chosen": -219.32235717773438, "logps/rejected": -392.68719482421875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.6797003746032715, "rewards/margins": 14.339788436889648, "rewards/rejected": -16.019489288330078, "step": 1565 }, { "epoch": 0.53, "learning_rate": 1.7191179297652844e-06, "logits/chosen": -0.8194763660430908, "logits/rejected": -0.7901120781898499, "logps/chosen": -138.81527709960938, "logps/rejected": -204.25123596191406, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.9122512340545654, "rewards/margins": 8.130061149597168, "rewards/rejected": -9.042311668395996, "step": 1566 }, { "epoch": 0.53, "learning_rate": 1.7187337497131269e-06, "logits/chosen": -0.8233810663223267, "logits/rejected": -0.7982977628707886, "logps/chosen": -212.111572265625, "logps/rejected": -288.69525146484375, "loss": 0.0424, "rewards/accuracies": 0.875, "rewards/chosen": -1.001116394996643, "rewards/margins": 10.284340858459473, "rewards/rejected": -11.285457611083984, "step": 1567 }, { "epoch": 0.54, "learning_rate": 1.7183493500977275e-06, "logits/chosen": -0.6958255171775818, "logits/rejected": -0.6733258366584778, "logps/chosen": -178.6270751953125, "logps/rejected": -284.7588195800781, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7478810548782349, "rewards/margins": 10.73234748840332, "rewards/rejected": -11.48022747039795, "step": 1568 }, { "epoch": 0.54, "learning_rate": 1.7179647310365151e-06, "logits/chosen": -0.7823693752288818, "logits/rejected": -0.7450627684593201, "logps/chosen": -203.89340209960938, "logps/rejected": -301.16949462890625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.0461217164993286, "rewards/margins": 12.25554370880127, "rewards/rejected": -13.301666259765625, "step": 1569 }, { "epoch": 0.54, "learning_rate": 1.7175798926469856e-06, "logits/chosen": -0.618365466594696, "logits/rejected": -0.5733203887939453, "logps/chosen": -203.2704620361328, "logps/rejected": -278.3216247558594, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -2.0431835651397705, "rewards/margins": 10.710270881652832, "rewards/rejected": -12.75345516204834, "step": 1570 }, { "epoch": 0.54, "learning_rate": 1.7171948350467016e-06, "logits/chosen": -0.7875710725784302, "logits/rejected": -0.7544357776641846, "logps/chosen": -171.2045135498047, "logps/rejected": -280.1011962890625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.7559495568275452, "rewards/margins": 13.511140823364258, "rewards/rejected": -14.267090797424316, "step": 1571 }, { "epoch": 0.54, "learning_rate": 1.716809558353293e-06, "logits/chosen": -0.7856402397155762, "logits/rejected": -0.7454336881637573, "logps/chosen": -159.13540649414062, "logps/rejected": -198.36483764648438, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.8528251647949219, "rewards/margins": 8.725153923034668, "rewards/rejected": -9.577980041503906, "step": 1572 }, { "epoch": 0.54, "learning_rate": 1.7164240626844567e-06, "logits/chosen": -0.8531931042671204, "logits/rejected": -0.8397752046585083, "logps/chosen": -198.05963134765625, "logps/rejected": -358.0408630371094, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3632450103759766, "rewards/margins": 14.55944538116455, "rewards/rejected": -15.922691345214844, "step": 1573 }, { "epoch": 0.54, "learning_rate": 1.716038348157956e-06, "logits/chosen": -0.7274768948554993, "logits/rejected": -0.6615907549858093, "logps/chosen": -251.51260375976562, "logps/rejected": -337.06793212890625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.03371471166610718, "rewards/margins": 12.892087936401367, "rewards/rejected": -12.925802230834961, "step": 1574 }, { "epoch": 0.54, "learning_rate": 1.7156524148916213e-06, "logits/chosen": -0.8115262985229492, "logits/rejected": -0.782985270023346, "logps/chosen": -235.48452758789062, "logps/rejected": -312.3746337890625, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8365375995635986, "rewards/margins": 13.150253295898438, "rewards/rejected": -13.986790657043457, "step": 1575 }, { "epoch": 0.54, "learning_rate": 1.7152662630033503e-06, "logits/chosen": -0.7173964977264404, "logits/rejected": -0.6898460388183594, "logps/chosen": -175.0258331298828, "logps/rejected": -244.45265197753906, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.795506238937378, "rewards/margins": 9.770645141601562, "rewards/rejected": -11.56615161895752, "step": 1576 }, { "epoch": 0.54, "learning_rate": 1.7148798926111065e-06, "logits/chosen": -0.8357862234115601, "logits/rejected": -0.8035523891448975, "logps/chosen": -177.514892578125, "logps/rejected": -296.3727111816406, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": 0.7355151176452637, "rewards/margins": 12.892951011657715, "rewards/rejected": -12.15743637084961, "step": 1577 }, { "epoch": 0.54, "learning_rate": 1.7144933038329214e-06, "logits/chosen": -0.805136501789093, "logits/rejected": -0.7755112051963806, "logps/chosen": -198.9203643798828, "logps/rejected": -304.4461669921875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.4728580713272095, "rewards/margins": 11.646465301513672, "rewards/rejected": -12.119322776794434, "step": 1578 }, { "epoch": 0.54, "learning_rate": 1.7141064967868922e-06, "logits/chosen": -0.7314304709434509, "logits/rejected": -0.6959134340286255, "logps/chosen": -183.69325256347656, "logps/rejected": -292.9422607421875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.0903174951672554, "rewards/margins": 12.903400421142578, "rewards/rejected": -12.993717193603516, "step": 1579 }, { "epoch": 0.54, "learning_rate": 1.713719471591183e-06, "logits/chosen": -0.7620181441307068, "logits/rejected": -0.7237510085105896, "logps/chosen": -281.15240478515625, "logps/rejected": -338.5819396972656, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": 0.48282870650291443, "rewards/margins": 10.498075485229492, "rewards/rejected": -10.015247344970703, "step": 1580 }, { "epoch": 0.54, "learning_rate": 1.7133322283640249e-06, "logits/chosen": -0.7905147075653076, "logits/rejected": -0.7687435150146484, "logps/chosen": -133.10867309570312, "logps/rejected": -231.9341583251953, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.0764020681381226, "rewards/margins": 9.809327125549316, "rewards/rejected": -10.88572883605957, "step": 1581 }, { "epoch": 0.54, "learning_rate": 1.7129447672237152e-06, "logits/chosen": -0.8258718252182007, "logits/rejected": -0.7968591451644897, "logps/chosen": -209.48019409179688, "logps/rejected": -339.4294738769531, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.3814913034439087, "rewards/margins": 13.630865097045898, "rewards/rejected": -14.012356758117676, "step": 1582 }, { "epoch": 0.54, "learning_rate": 1.712557088288618e-06, "logits/chosen": -0.8388197422027588, "logits/rejected": -0.7701252698898315, "logps/chosen": -198.1717987060547, "logps/rejected": -253.16009521484375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.12639524042606354, "rewards/margins": 10.733451843261719, "rewards/rejected": -10.859847068786621, "step": 1583 }, { "epoch": 0.54, "learning_rate": 1.7121691916771637e-06, "logits/chosen": -0.7424236536026001, "logits/rejected": -0.7378392219543457, "logps/chosen": -172.5025177001953, "logps/rejected": -320.404296875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.178715705871582, "rewards/margins": 13.12915325164795, "rewards/rejected": -14.307868957519531, "step": 1584 }, { "epoch": 0.54, "learning_rate": 1.7117810775078493e-06, "logits/chosen": -0.740548312664032, "logits/rejected": -0.7201482057571411, "logps/chosen": -207.41122436523438, "logps/rejected": -308.7485656738281, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -1.2096983194351196, "rewards/margins": 11.847551345825195, "rewards/rejected": -13.05724811553955, "step": 1585 }, { "epoch": 0.54, "learning_rate": 1.7113927458992388e-06, "logits/chosen": -0.7937425374984741, "logits/rejected": -0.7700530290603638, "logps/chosen": -159.17071533203125, "logps/rejected": -276.9266052246094, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -0.11912515759468079, "rewards/margins": 11.706809043884277, "rewards/rejected": -11.825935363769531, "step": 1586 }, { "epoch": 0.54, "learning_rate": 1.711004196969962e-06, "logits/chosen": -0.7385679483413696, "logits/rejected": -0.7594189643859863, "logps/chosen": -169.62139892578125, "logps/rejected": -387.2379455566406, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.021870285272598267, "rewards/margins": 16.3179931640625, "rewards/rejected": -16.33986473083496, "step": 1587 }, { "epoch": 0.54, "learning_rate": 1.710615430838715e-06, "logits/chosen": -0.7316516637802124, "logits/rejected": -0.715221643447876, "logps/chosen": -172.93673706054688, "logps/rejected": -275.9180908203125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.5778984427452087, "rewards/margins": 9.772138595581055, "rewards/rejected": -10.350037574768066, "step": 1588 }, { "epoch": 0.54, "learning_rate": 1.7102264476242602e-06, "logits/chosen": -0.7650504112243652, "logits/rejected": -0.7414705753326416, "logps/chosen": -181.48800659179688, "logps/rejected": -281.8921203613281, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.2588685750961304, "rewards/margins": 12.735260963439941, "rewards/rejected": -13.99413013458252, "step": 1589 }, { "epoch": 0.54, "learning_rate": 1.7098372474454276e-06, "logits/chosen": -0.7729201316833496, "logits/rejected": -0.7523113489151001, "logps/chosen": -188.5896759033203, "logps/rejected": -334.095458984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2816188335418701, "rewards/margins": 14.092819213867188, "rewards/rejected": -15.374438285827637, "step": 1590 }, { "epoch": 0.54, "learning_rate": 1.7094478304211113e-06, "logits/chosen": -0.7294334173202515, "logits/rejected": -0.701511800289154, "logps/chosen": -185.48907470703125, "logps/rejected": -263.521728515625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.1903886795043945, "rewards/margins": 10.165300369262695, "rewards/rejected": -11.35568904876709, "step": 1591 }, { "epoch": 0.54, "learning_rate": 1.7090581966702737e-06, "logits/chosen": -0.7118923664093018, "logits/rejected": -0.6846755743026733, "logps/chosen": -170.4217071533203, "logps/rejected": -266.0950622558594, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.5277214050292969, "rewards/margins": 10.964046478271484, "rewards/rejected": -11.491768836975098, "step": 1592 }, { "epoch": 0.54, "learning_rate": 1.7086683463119423e-06, "logits/chosen": -0.7341826558113098, "logits/rejected": -0.7208204865455627, "logps/chosen": -207.28688049316406, "logps/rejected": -336.1845703125, "loss": 0.0618, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8932936787605286, "rewards/margins": 12.875894546508789, "rewards/rejected": -13.76918888092041, "step": 1593 }, { "epoch": 0.54, "learning_rate": 1.708278279465211e-06, "logits/chosen": -0.7676510810852051, "logits/rejected": -0.730313241481781, "logps/chosen": -167.8517608642578, "logps/rejected": -288.29241943359375, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.529213011264801, "rewards/margins": 11.342134475708008, "rewards/rejected": -11.871347427368164, "step": 1594 }, { "epoch": 0.54, "learning_rate": 1.7078879962492395e-06, "logits/chosen": -0.644192636013031, "logits/rejected": -0.6063737869262695, "logps/chosen": -139.31918334960938, "logps/rejected": -257.73480224609375, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.8593138456344604, "rewards/margins": 12.008940696716309, "rewards/rejected": -12.868253707885742, "step": 1595 }, { "epoch": 0.54, "learning_rate": 1.7074974967832543e-06, "logits/chosen": -0.8037374019622803, "logits/rejected": -0.8064567446708679, "logps/chosen": -213.71627807617188, "logps/rejected": -380.6391296386719, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.36149632930755615, "rewards/margins": 14.014167785644531, "rewards/rejected": -14.375664710998535, "step": 1596 }, { "epoch": 0.55, "learning_rate": 1.7071067811865474e-06, "logits/chosen": -0.7229506969451904, "logits/rejected": -0.674793004989624, "logps/chosen": -227.86419677734375, "logps/rejected": -326.5888977050781, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 0.060174763202667236, "rewards/margins": 15.784003257751465, "rewards/rejected": -15.723828315734863, "step": 1597 }, { "epoch": 0.55, "learning_rate": 1.7067158495784772e-06, "logits/chosen": -0.6673161387443542, "logits/rejected": -0.6318796277046204, "logps/chosen": -164.6383819580078, "logps/rejected": -267.432373046875, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.679762601852417, "rewards/margins": 11.50221061706543, "rewards/rejected": -12.181973457336426, "step": 1598 }, { "epoch": 0.55, "learning_rate": 1.7063247020784682e-06, "logits/chosen": -0.6807839274406433, "logits/rejected": -0.6786254048347473, "logps/chosen": -147.48928833007812, "logps/rejected": -262.0566711425781, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.170313835144043, "rewards/margins": 9.31913948059082, "rewards/rejected": -10.489453315734863, "step": 1599 }, { "epoch": 0.55, "learning_rate": 1.7059333388060097e-06, "logits/chosen": -0.6710614562034607, "logits/rejected": -0.676902711391449, "logps/chosen": -156.11260986328125, "logps/rejected": -314.468017578125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.6800433397293091, "rewards/margins": 12.823007583618164, "rewards/rejected": -13.5030517578125, "step": 1600 }, { "epoch": 0.55, "learning_rate": 1.7055417598806584e-06, "logits/chosen": -0.7348918914794922, "logits/rejected": -0.7039461731910706, "logps/chosen": -238.0584259033203, "logps/rejected": -308.95733642578125, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6459873914718628, "rewards/margins": 12.068897247314453, "rewards/rejected": -12.714883804321289, "step": 1601 }, { "epoch": 0.55, "learning_rate": 1.7051499654220366e-06, "logits/chosen": -0.716124951839447, "logits/rejected": -0.6744682788848877, "logps/chosen": -220.84274291992188, "logps/rejected": -301.8958740234375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.6747053861618042, "rewards/margins": 12.717765808105469, "rewards/rejected": -13.392471313476562, "step": 1602 }, { "epoch": 0.55, "learning_rate": 1.7047579555498311e-06, "logits/chosen": -0.7922161221504211, "logits/rejected": -0.7847835421562195, "logps/chosen": -173.62493896484375, "logps/rejected": -302.099609375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.3781358003616333, "rewards/margins": 10.617002487182617, "rewards/rejected": -11.995138168334961, "step": 1603 }, { "epoch": 0.55, "learning_rate": 1.704365730383796e-06, "logits/chosen": -0.7784188985824585, "logits/rejected": -0.7766914963722229, "logps/chosen": -180.40792846679688, "logps/rejected": -321.89312744140625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.5646318197250366, "rewards/margins": 12.050834655761719, "rewards/rejected": -13.615467071533203, "step": 1604 }, { "epoch": 0.55, "learning_rate": 1.7039732900437514e-06, "logits/chosen": -0.8271380662918091, "logits/rejected": -0.7860352396965027, "logps/chosen": -196.47604370117188, "logps/rejected": -372.82666015625, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -1.1714692115783691, "rewards/margins": 17.611297607421875, "rewards/rejected": -18.782766342163086, "step": 1605 }, { "epoch": 0.55, "learning_rate": 1.7035806346495815e-06, "logits/chosen": -0.728077232837677, "logits/rejected": -0.6865540742874146, "logps/chosen": -141.92657470703125, "logps/rejected": -211.40492248535156, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0042954683303833, "rewards/margins": 10.52860164642334, "rewards/rejected": -11.53289794921875, "step": 1606 }, { "epoch": 0.55, "learning_rate": 1.703187764321237e-06, "logits/chosen": -0.7240062355995178, "logits/rejected": -0.7391926050186157, "logps/chosen": -190.1287841796875, "logps/rejected": -355.9347839355469, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": -0.9320176839828491, "rewards/margins": 12.406105041503906, "rewards/rejected": -13.338123321533203, "step": 1607 }, { "epoch": 0.55, "learning_rate": 1.7027946791787349e-06, "logits/chosen": -0.7665182948112488, "logits/rejected": -0.7275741100311279, "logps/chosen": -187.69082641601562, "logps/rejected": -236.63467407226562, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -1.6693971157073975, "rewards/margins": 9.444074630737305, "rewards/rejected": -11.113471984863281, "step": 1608 }, { "epoch": 0.55, "learning_rate": 1.702401379342157e-06, "logits/chosen": -0.7887076139450073, "logits/rejected": -0.7891150712966919, "logps/chosen": -218.50289916992188, "logps/rejected": -356.2660217285156, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.7770951390266418, "rewards/margins": 11.933584213256836, "rewards/rejected": -12.710679054260254, "step": 1609 }, { "epoch": 0.55, "learning_rate": 1.7020078649316512e-06, "logits/chosen": -0.7065632343292236, "logits/rejected": -0.675001323223114, "logps/chosen": -177.760498046875, "logps/rejected": -273.3494873046875, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.708375096321106, "rewards/margins": 10.268953323364258, "rewards/rejected": -11.97732925415039, "step": 1610 }, { "epoch": 0.55, "learning_rate": 1.7016141360674304e-06, "logits/chosen": -0.7296184301376343, "logits/rejected": -0.6915185451507568, "logps/chosen": -170.3173370361328, "logps/rejected": -257.5924377441406, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.6285082101821899, "rewards/margins": 12.157464981079102, "rewards/rejected": -12.785972595214844, "step": 1611 }, { "epoch": 0.55, "learning_rate": 1.7012201928697735e-06, "logits/chosen": -0.6658099889755249, "logits/rejected": -0.622800350189209, "logps/chosen": -196.96771240234375, "logps/rejected": -278.9100036621094, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -0.8159130811691284, "rewards/margins": 11.914596557617188, "rewards/rejected": -12.730510711669922, "step": 1612 }, { "epoch": 0.55, "learning_rate": 1.700826035459025e-06, "logits/chosen": -0.7553315758705139, "logits/rejected": -0.7383878231048584, "logps/chosen": -205.75718688964844, "logps/rejected": -374.2203063964844, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.8293100595474243, "rewards/margins": 15.530579566955566, "rewards/rejected": -16.35988998413086, "step": 1613 }, { "epoch": 0.55, "learning_rate": 1.700431663955594e-06, "logits/chosen": -0.7302578687667847, "logits/rejected": -0.7027519941329956, "logps/chosen": -191.7421875, "logps/rejected": -308.3577880859375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 0.021040797233581543, "rewards/margins": 12.985879898071289, "rewards/rejected": -12.96484088897705, "step": 1614 }, { "epoch": 0.55, "learning_rate": 1.7000370784799562e-06, "logits/chosen": -0.7676390409469604, "logits/rejected": -0.7406318187713623, "logps/chosen": -159.11997985839844, "logps/rejected": -282.85858154296875, "loss": 0.0531, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6372107863426208, "rewards/margins": 11.996219635009766, "rewards/rejected": -12.633432388305664, "step": 1615 }, { "epoch": 0.55, "learning_rate": 1.6996422791526512e-06, "logits/chosen": -0.8276156187057495, "logits/rejected": -0.7863647937774658, "logps/chosen": -214.9062957763672, "logps/rejected": -331.30206298828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.33737531304359436, "rewards/margins": 14.5182466506958, "rewards/rejected": -14.855621337890625, "step": 1616 }, { "epoch": 0.55, "learning_rate": 1.6992472660942855e-06, "logits/chosen": -0.7656667828559875, "logits/rejected": -0.7546906471252441, "logps/chosen": -190.7325439453125, "logps/rejected": -306.7724304199219, "loss": 0.1378, "rewards/accuracies": 1.0, "rewards/chosen": -0.6765208840370178, "rewards/margins": 10.680095672607422, "rewards/rejected": -11.356616020202637, "step": 1617 }, { "epoch": 0.55, "learning_rate": 1.6988520394255296e-06, "logits/chosen": -0.7665883302688599, "logits/rejected": -0.7308921813964844, "logps/chosen": -189.6995391845703, "logps/rejected": -275.7660217285156, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3547813892364502, "rewards/margins": 12.658161163330078, "rewards/rejected": -14.01294231414795, "step": 1618 }, { "epoch": 0.55, "learning_rate": 1.6984565992671202e-06, "logits/chosen": -0.6073290109634399, "logits/rejected": -0.5924747586250305, "logps/chosen": -195.03407287597656, "logps/rejected": -305.83489990234375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.015523850917816162, "rewards/margins": 10.933058738708496, "rewards/rejected": -10.94858169555664, "step": 1619 }, { "epoch": 0.55, "learning_rate": 1.6980609457398587e-06, "logits/chosen": -0.8224543929100037, "logits/rejected": -0.7593342065811157, "logps/chosen": -233.3463134765625, "logps/rejected": -232.83067321777344, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 0.14982835948467255, "rewards/margins": 8.482900619506836, "rewards/rejected": -8.3330717086792, "step": 1620 }, { "epoch": 0.55, "learning_rate": 1.6976650789646115e-06, "logits/chosen": -0.7306627631187439, "logits/rejected": -0.7158505320549011, "logps/chosen": -165.85617065429688, "logps/rejected": -316.0383605957031, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.024572208523750305, "rewards/margins": 12.722038269042969, "rewards/rejected": -12.697466850280762, "step": 1621 }, { "epoch": 0.55, "learning_rate": 1.6972689990623108e-06, "logits/chosen": -0.796808123588562, "logits/rejected": -0.7641488313674927, "logps/chosen": -161.45721435546875, "logps/rejected": -305.03363037109375, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -0.6322493553161621, "rewards/margins": 13.338849067687988, "rewards/rejected": -13.971098899841309, "step": 1622 }, { "epoch": 0.55, "learning_rate": 1.6968727061539534e-06, "logits/chosen": -0.7589690685272217, "logits/rejected": -0.7231643795967102, "logps/chosen": -229.13540649414062, "logps/rejected": -340.0542907714844, "loss": 0.0167, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0885369777679443, "rewards/margins": 13.146832466125488, "rewards/rejected": -14.235368728637695, "step": 1623 }, { "epoch": 0.55, "learning_rate": 1.6964762003606014e-06, "logits/chosen": -0.7451703548431396, "logits/rejected": -0.7277234196662903, "logps/chosen": -215.88372802734375, "logps/rejected": -316.9812927246094, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.15903128683567047, "rewards/margins": 11.38956069946289, "rewards/rejected": -11.548592567443848, "step": 1624 }, { "epoch": 0.55, "learning_rate": 1.6960794818033817e-06, "logits/chosen": -0.7368825674057007, "logits/rejected": -0.7357358336448669, "logps/chosen": -172.54296875, "logps/rejected": -315.3763122558594, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.7420315742492676, "rewards/margins": 13.370614051818848, "rewards/rejected": -14.112646102905273, "step": 1625 }, { "epoch": 0.55, "learning_rate": 1.6956825506034863e-06, "logits/chosen": -0.7035046219825745, "logits/rejected": -0.7030103802680969, "logps/chosen": -218.4431915283203, "logps/rejected": -306.4655456542969, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.1976367235183716, "rewards/margins": 10.652796745300293, "rewards/rejected": -11.850433349609375, "step": 1626 }, { "epoch": 0.56, "learning_rate": 1.6952854068821724e-06, "logits/chosen": -0.67799311876297, "logits/rejected": -0.6459581255912781, "logps/chosen": -174.44580078125, "logps/rejected": -309.34039306640625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.426027536392212, "rewards/margins": 12.225870132446289, "rewards/rejected": -13.651896476745605, "step": 1627 }, { "epoch": 0.56, "learning_rate": 1.694888050760762e-06, "logits/chosen": -0.715409517288208, "logits/rejected": -0.7008752822875977, "logps/chosen": -198.29727172851562, "logps/rejected": -279.029052734375, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.5098602771759033, "rewards/margins": 8.385046005249023, "rewards/rejected": -8.894905090332031, "step": 1628 }, { "epoch": 0.56, "learning_rate": 1.6944904823606417e-06, "logits/chosen": -0.7091214656829834, "logits/rejected": -0.6736956834793091, "logps/chosen": -214.7150421142578, "logps/rejected": -369.7413024902344, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -0.3043779134750366, "rewards/margins": 14.639034271240234, "rewards/rejected": -14.943411827087402, "step": 1629 }, { "epoch": 0.56, "learning_rate": 1.6940927018032628e-06, "logits/chosen": -0.7065119743347168, "logits/rejected": -0.6811855435371399, "logps/chosen": -174.32687377929688, "logps/rejected": -333.410888671875, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -1.0545001029968262, "rewards/margins": 11.917804718017578, "rewards/rejected": -12.972305297851562, "step": 1630 }, { "epoch": 0.56, "learning_rate": 1.693694709210143e-06, "logits/chosen": -0.6891994476318359, "logits/rejected": -0.6610634922981262, "logps/chosen": -151.2853240966797, "logps/rejected": -250.255859375, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.13301445543766022, "rewards/margins": 12.597405433654785, "rewards/rejected": -12.73042106628418, "step": 1631 }, { "epoch": 0.56, "learning_rate": 1.6932965047028618e-06, "logits/chosen": -0.6332094669342041, "logits/rejected": -0.599982500076294, "logps/chosen": -79.46476745605469, "logps/rejected": -209.9210205078125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.5323023796081543, "rewards/margins": 11.810051918029785, "rewards/rejected": -12.342353820800781, "step": 1632 }, { "epoch": 0.56, "learning_rate": 1.692898088403067e-06, "logits/chosen": -0.7200354337692261, "logits/rejected": -0.6877641677856445, "logps/chosen": -237.4645538330078, "logps/rejected": -333.8484191894531, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3805925846099854, "rewards/margins": 11.96282958984375, "rewards/rejected": -13.343422889709473, "step": 1633 }, { "epoch": 0.56, "learning_rate": 1.6924994604324678e-06, "logits/chosen": -0.7599742412567139, "logits/rejected": -0.7605247497558594, "logps/chosen": -202.7925262451172, "logps/rejected": -345.52081298828125, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.61175537109375, "rewards/margins": 12.203718185424805, "rewards/rejected": -13.815473556518555, "step": 1634 }, { "epoch": 0.56, "learning_rate": 1.6921006209128402e-06, "logits/chosen": -0.7514083981513977, "logits/rejected": -0.717835545539856, "logps/chosen": -205.95584106445312, "logps/rejected": -320.20892333984375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.5158342123031616, "rewards/margins": 12.31862735748291, "rewards/rejected": -12.834461212158203, "step": 1635 }, { "epoch": 0.56, "learning_rate": 1.6917015699660243e-06, "logits/chosen": -0.758281946182251, "logits/rejected": -0.73395836353302, "logps/chosen": -187.8636932373047, "logps/rejected": -282.3182678222656, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.8806720972061157, "rewards/margins": 11.049851417541504, "rewards/rejected": -12.930522918701172, "step": 1636 }, { "epoch": 0.56, "learning_rate": 1.6913023077139243e-06, "logits/chosen": -0.7228269577026367, "logits/rejected": -0.7085506319999695, "logps/chosen": -190.0752716064453, "logps/rejected": -358.6446533203125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.6857231855392456, "rewards/margins": 16.510324478149414, "rewards/rejected": -17.196046829223633, "step": 1637 }, { "epoch": 0.56, "learning_rate": 1.6909028342785096e-06, "logits/chosen": -0.7717589735984802, "logits/rejected": -0.7395263314247131, "logps/chosen": -203.33242797851562, "logps/rejected": -339.63433837890625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.9274117350578308, "rewards/margins": 12.948247909545898, "rewards/rejected": -13.875659942626953, "step": 1638 }, { "epoch": 0.56, "learning_rate": 1.6905031497818136e-06, "logits/chosen": -0.7233151793479919, "logits/rejected": -0.6925632953643799, "logps/chosen": -183.63955688476562, "logps/rejected": -256.6909484863281, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4186016917228699, "rewards/margins": 10.25051498413086, "rewards/rejected": -10.669116020202637, "step": 1639 }, { "epoch": 0.56, "learning_rate": 1.6901032543459347e-06, "logits/chosen": -0.748753547668457, "logits/rejected": -0.726798951625824, "logps/chosen": -162.85275268554688, "logps/rejected": -242.813232421875, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -0.9591127634048462, "rewards/margins": 9.466114044189453, "rewards/rejected": -10.425226211547852, "step": 1640 }, { "epoch": 0.56, "learning_rate": 1.689703148093035e-06, "logits/chosen": -0.7306690216064453, "logits/rejected": -0.7172698378562927, "logps/chosen": -147.3928680419922, "logps/rejected": -282.3106994628906, "loss": 0.0195, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3298001289367676, "rewards/margins": 11.572944641113281, "rewards/rejected": -11.243144989013672, "step": 1641 }, { "epoch": 0.56, "learning_rate": 1.6893028311453418e-06, "logits/chosen": -0.7036974430084229, "logits/rejected": -0.6805247664451599, "logps/chosen": -195.18411254882812, "logps/rejected": -260.2562561035156, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.8765028715133667, "rewards/margins": 9.807367324829102, "rewards/rejected": -10.683869361877441, "step": 1642 }, { "epoch": 0.56, "learning_rate": 1.6889023036251461e-06, "logits/chosen": -0.7486907839775085, "logits/rejected": -0.7411226034164429, "logps/chosen": -214.0384063720703, "logps/rejected": -372.9800720214844, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.500199794769287, "rewards/margins": 13.42508602142334, "rewards/rejected": -14.925286293029785, "step": 1643 }, { "epoch": 0.56, "learning_rate": 1.6885015656548038e-06, "logits/chosen": -0.7753613591194153, "logits/rejected": -0.7542409300804138, "logps/chosen": -197.18243408203125, "logps/rejected": -332.9610595703125, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -1.0256943702697754, "rewards/margins": 12.347219467163086, "rewards/rejected": -13.372913360595703, "step": 1644 }, { "epoch": 0.56, "learning_rate": 1.688100617356735e-06, "logits/chosen": -0.7883706092834473, "logits/rejected": -0.7658645510673523, "logps/chosen": -203.90823364257812, "logps/rejected": -349.14093017578125, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.9416306018829346, "rewards/margins": 13.077609062194824, "rewards/rejected": -14.019240379333496, "step": 1645 }, { "epoch": 0.56, "learning_rate": 1.6876994588534233e-06, "logits/chosen": -0.7182756662368774, "logits/rejected": -0.7102433443069458, "logps/chosen": -144.07839965820312, "logps/rejected": -267.4396057128906, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3614020347595215, "rewards/margins": 10.49554443359375, "rewards/rejected": -11.85694694519043, "step": 1646 }, { "epoch": 0.56, "learning_rate": 1.6872980902674174e-06, "logits/chosen": -0.7015480399131775, "logits/rejected": -0.6902076601982117, "logps/chosen": -134.2080078125, "logps/rejected": -225.96636962890625, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 0.26710495352745056, "rewards/margins": 10.60407543182373, "rewards/rejected": -10.336971282958984, "step": 1647 }, { "epoch": 0.56, "learning_rate": 1.68689651172133e-06, "logits/chosen": -0.7300392985343933, "logits/rejected": -0.7022305727005005, "logps/chosen": -270.3973083496094, "logps/rejected": -332.88818359375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.2805372476577759, "rewards/margins": 11.586174011230469, "rewards/rejected": -11.86671257019043, "step": 1648 }, { "epoch": 0.56, "learning_rate": 1.6864947233378373e-06, "logits/chosen": -0.7271260619163513, "logits/rejected": -0.712488055229187, "logps/chosen": -173.20431518554688, "logps/rejected": -317.291748046875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.8281212449073792, "rewards/margins": 13.3965425491333, "rewards/rejected": -14.224664688110352, "step": 1649 }, { "epoch": 0.56, "learning_rate": 1.6860927252396807e-06, "logits/chosen": -0.8200886845588684, "logits/rejected": -0.7755399942398071, "logps/chosen": -219.9267578125, "logps/rejected": -315.816162109375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.559719443321228, "rewards/margins": 13.88139533996582, "rewards/rejected": -14.441113471984863, "step": 1650 }, { "epoch": 0.56, "learning_rate": 1.6856905175496647e-06, "logits/chosen": -0.7687102556228638, "logits/rejected": -0.750404953956604, "logps/chosen": -102.06051635742188, "logps/rejected": -211.43984985351562, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -0.8809714317321777, "rewards/margins": 10.193069458007812, "rewards/rejected": -11.074041366577148, "step": 1651 }, { "epoch": 0.56, "learning_rate": 1.6852881003906588e-06, "logits/chosen": -0.7792474627494812, "logits/rejected": -0.7552543878555298, "logps/chosen": -196.52301025390625, "logps/rejected": -303.1496887207031, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.21671657264232635, "rewards/margins": 12.84272575378418, "rewards/rejected": -13.059443473815918, "step": 1652 }, { "epoch": 0.56, "learning_rate": 1.6848854738855946e-06, "logits/chosen": -0.7135239839553833, "logits/rejected": -0.7016212344169617, "logps/chosen": -193.62139892578125, "logps/rejected": -334.3155517578125, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -0.7338831424713135, "rewards/margins": 14.78787612915039, "rewards/rejected": -15.521759033203125, "step": 1653 }, { "epoch": 0.56, "learning_rate": 1.6844826381574705e-06, "logits/chosen": -0.7588199973106384, "logits/rejected": -0.7315587401390076, "logps/chosen": -234.57064819335938, "logps/rejected": -319.9272766113281, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.2962455749511719, "rewards/margins": 11.26807689666748, "rewards/rejected": -11.564323425292969, "step": 1654 }, { "epoch": 0.56, "learning_rate": 1.6840795933293462e-06, "logits/chosen": -0.7351438403129578, "logits/rejected": -0.7037146687507629, "logps/chosen": -252.65151977539062, "logps/rejected": -423.84979248046875, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": 1.510398268699646, "rewards/margins": 18.002578735351562, "rewards/rejected": -16.49217987060547, "step": 1655 }, { "epoch": 0.57, "learning_rate": 1.6836763395243465e-06, "logits/chosen": -0.7231351733207703, "logits/rejected": -0.7191638946533203, "logps/chosen": -207.4827423095703, "logps/rejected": -350.5978088378906, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.8808619976043701, "rewards/margins": 13.545334815979004, "rewards/rejected": -12.664474487304688, "step": 1656 }, { "epoch": 0.57, "learning_rate": 1.6832728768656602e-06, "logits/chosen": -0.7913818359375, "logits/rejected": -0.762725830078125, "logps/chosen": -172.51104736328125, "logps/rejected": -328.890625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.8485439419746399, "rewards/margins": 13.41282844543457, "rewards/rejected": -14.261371612548828, "step": 1657 }, { "epoch": 0.57, "learning_rate": 1.6828692054765395e-06, "logits/chosen": -0.7704718112945557, "logits/rejected": -0.7475634813308716, "logps/chosen": -190.61497497558594, "logps/rejected": -291.7501525878906, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6147966384887695, "rewards/margins": 11.63309097290039, "rewards/rejected": -12.24788761138916, "step": 1658 }, { "epoch": 0.57, "learning_rate": 1.6824653254803002e-06, "logits/chosen": -0.8054676651954651, "logits/rejected": -0.7736284136772156, "logps/chosen": -219.8235321044922, "logps/rejected": -328.69232177734375, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.6229947805404663, "rewards/margins": 13.211470603942871, "rewards/rejected": -13.834465026855469, "step": 1659 }, { "epoch": 0.57, "learning_rate": 1.682061237000322e-06, "logits/chosen": -0.7541403770446777, "logits/rejected": -0.7150189876556396, "logps/chosen": -208.6708984375, "logps/rejected": -344.60321044921875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.26845332980155945, "rewards/margins": 15.884841918945312, "rewards/rejected": -16.153295516967773, "step": 1660 }, { "epoch": 0.57, "learning_rate": 1.6816569401600486e-06, "logits/chosen": -0.656576931476593, "logits/rejected": -0.6417176723480225, "logps/chosen": -183.77195739746094, "logps/rejected": -295.2938537597656, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.31727275252342224, "rewards/margins": 11.874380111694336, "rewards/rejected": -12.191654205322266, "step": 1661 }, { "epoch": 0.57, "learning_rate": 1.681252435082987e-06, "logits/chosen": -0.7466003894805908, "logits/rejected": -0.7236153483390808, "logps/chosen": -230.27272033691406, "logps/rejected": -313.5864562988281, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.39451223611831665, "rewards/margins": 10.15325927734375, "rewards/rejected": -10.547770500183105, "step": 1662 }, { "epoch": 0.57, "learning_rate": 1.6808477218927084e-06, "logits/chosen": -0.7726539969444275, "logits/rejected": -0.7609131336212158, "logps/chosen": -246.1075897216797, "logps/rejected": -404.1507568359375, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.3464192748069763, "rewards/margins": 14.015256881713867, "rewards/rejected": -14.361677169799805, "step": 1663 }, { "epoch": 0.57, "learning_rate": 1.6804428007128465e-06, "logits/chosen": -0.7739159464836121, "logits/rejected": -0.7384783029556274, "logps/chosen": -247.3255615234375, "logps/rejected": -351.96429443359375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.3410942554473877, "rewards/margins": 13.183235168457031, "rewards/rejected": -13.52432918548584, "step": 1664 }, { "epoch": 0.57, "learning_rate": 1.680037671667099e-06, "logits/chosen": -0.72135990858078, "logits/rejected": -0.6976432204246521, "logps/chosen": -163.06341552734375, "logps/rejected": -266.1370544433594, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.6150668859481812, "rewards/margins": 12.963512420654297, "rewards/rejected": -13.57857894897461, "step": 1665 }, { "epoch": 0.57, "learning_rate": 1.6796323348792278e-06, "logits/chosen": -0.7739548087120056, "logits/rejected": -0.7562032341957092, "logps/chosen": -181.71484375, "logps/rejected": -293.2906494140625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6617914438247681, "rewards/margins": 12.312514305114746, "rewards/rejected": -12.974305152893066, "step": 1666 }, { "epoch": 0.57, "learning_rate": 1.6792267904730575e-06, "logits/chosen": -0.8890407681465149, "logits/rejected": -0.8442140817642212, "logps/chosen": -219.54917907714844, "logps/rejected": -348.7392578125, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.6497756242752075, "rewards/margins": 14.107695579528809, "rewards/rejected": -15.757471084594727, "step": 1667 }, { "epoch": 0.57, "learning_rate": 1.678821038572476e-06, "logits/chosen": -0.766625702381134, "logits/rejected": -0.7370834946632385, "logps/chosen": -180.49639892578125, "logps/rejected": -257.6893310546875, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -0.08869144320487976, "rewards/margins": 11.627191543579102, "rewards/rejected": -11.715882301330566, "step": 1668 }, { "epoch": 0.57, "learning_rate": 1.6784150793014354e-06, "logits/chosen": -0.8083032965660095, "logits/rejected": -0.7842623591423035, "logps/chosen": -233.46946716308594, "logps/rejected": -325.6697998046875, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.33314624428749084, "rewards/margins": 11.008814811706543, "rewards/rejected": -11.341961860656738, "step": 1669 }, { "epoch": 0.57, "learning_rate": 1.6780089127839502e-06, "logits/chosen": -0.718280553817749, "logits/rejected": -0.7024245262145996, "logps/chosen": -184.74301147460938, "logps/rejected": -313.188720703125, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.6388788819313049, "rewards/margins": 12.161200523376465, "rewards/rejected": -12.800079345703125, "step": 1670 }, { "epoch": 0.57, "learning_rate": 1.677602539144099e-06, "logits/chosen": -0.6833667755126953, "logits/rejected": -0.6755442023277283, "logps/chosen": -196.25823974609375, "logps/rejected": -369.6131286621094, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 0.4934980571269989, "rewards/margins": 14.02457046508789, "rewards/rejected": -13.531074523925781, "step": 1671 }, { "epoch": 0.57, "learning_rate": 1.6771959585060232e-06, "logits/chosen": -0.6772648096084595, "logits/rejected": -0.6310067176818848, "logps/chosen": -219.9515838623047, "logps/rejected": -348.92108154296875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.6298667192459106, "rewards/margins": 16.063186645507812, "rewards/rejected": -16.69305419921875, "step": 1672 }, { "epoch": 0.57, "learning_rate": 1.6767891709939272e-06, "logits/chosen": -0.8547443747520447, "logits/rejected": -0.8182317018508911, "logps/chosen": -170.49880981445312, "logps/rejected": -226.24203491210938, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 0.8486621379852295, "rewards/margins": 10.77696418762207, "rewards/rejected": -9.928301811218262, "step": 1673 }, { "epoch": 0.57, "learning_rate": 1.6763821767320799e-06, "logits/chosen": -0.7949922680854797, "logits/rejected": -0.74112868309021, "logps/chosen": -253.91262817382812, "logps/rejected": -277.38079833984375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 1.2314790487289429, "rewards/margins": 12.7252779006958, "rewards/rejected": -11.493799209594727, "step": 1674 }, { "epoch": 0.57, "learning_rate": 1.6759749758448117e-06, "logits/chosen": -0.8164458870887756, "logits/rejected": -0.7578095197677612, "logps/chosen": -276.0245361328125, "logps/rejected": -437.0779113769531, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.23000666499137878, "rewards/margins": 16.985931396484375, "rewards/rejected": -17.215938568115234, "step": 1675 }, { "epoch": 0.57, "learning_rate": 1.6755675684565169e-06, "logits/chosen": -0.6350435614585876, "logits/rejected": -0.6063147783279419, "logps/chosen": -186.22760009765625, "logps/rejected": -284.3563232421875, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.9717761278152466, "rewards/margins": 10.978363990783691, "rewards/rejected": -11.950139999389648, "step": 1676 }, { "epoch": 0.57, "learning_rate": 1.675159954691653e-06, "logits/chosen": -0.7518919110298157, "logits/rejected": -0.6960408091545105, "logps/chosen": -189.08216857910156, "logps/rejected": -274.8541564941406, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.34946149587631226, "rewards/margins": 11.9976167678833, "rewards/rejected": -12.347078323364258, "step": 1677 }, { "epoch": 0.57, "learning_rate": 1.6747521346747404e-06, "logits/chosen": -0.6674649715423584, "logits/rejected": -0.6410110592842102, "logps/chosen": -163.2681884765625, "logps/rejected": -228.73098754882812, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.5907485485076904, "rewards/margins": 7.281552314758301, "rewards/rejected": -8.87230110168457, "step": 1678 }, { "epoch": 0.57, "learning_rate": 1.6743441085303624e-06, "logits/chosen": -0.7393080592155457, "logits/rejected": -0.6888861060142517, "logps/chosen": -234.26388549804688, "logps/rejected": -330.180908203125, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -1.3952465057373047, "rewards/margins": 14.968008041381836, "rewards/rejected": -16.36325454711914, "step": 1679 }, { "epoch": 0.57, "learning_rate": 1.6739358763831654e-06, "logits/chosen": -0.7096680998802185, "logits/rejected": -0.6682446599006653, "logps/chosen": -211.77691650390625, "logps/rejected": -290.9571533203125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.10221480578184128, "rewards/margins": 12.34013557434082, "rewards/rejected": -12.442349433898926, "step": 1680 }, { "epoch": 0.57, "learning_rate": 1.673527438357859e-06, "logits/chosen": -0.6979347467422485, "logits/rejected": -0.637416422367096, "logps/chosen": -224.8660888671875, "logps/rejected": -320.7806396484375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.36036035418510437, "rewards/margins": 15.81357192993164, "rewards/rejected": -15.453211784362793, "step": 1681 }, { "epoch": 0.57, "learning_rate": 1.6731187945792148e-06, "logits/chosen": -0.7004508376121521, "logits/rejected": -0.6788731217384338, "logps/chosen": -191.9920654296875, "logps/rejected": -343.57525634765625, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.5035538077354431, "rewards/margins": 13.328638076782227, "rewards/rejected": -13.832192420959473, "step": 1682 }, { "epoch": 0.57, "learning_rate": 1.672709945172068e-06, "logits/chosen": -0.7140088677406311, "logits/rejected": -0.6961982250213623, "logps/chosen": -175.52769470214844, "logps/rejected": -252.28578186035156, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.6434664130210876, "rewards/margins": 10.441009521484375, "rewards/rejected": -11.084474563598633, "step": 1683 }, { "epoch": 0.57, "learning_rate": 1.6723008902613168e-06, "logits/chosen": -0.6900888681411743, "logits/rejected": -0.6634820699691772, "logps/chosen": -158.34207153320312, "logps/rejected": -260.635498046875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.5639619827270508, "rewards/margins": 10.519851684570312, "rewards/rejected": -11.08381462097168, "step": 1684 }, { "epoch": 0.58, "learning_rate": 1.6718916299719214e-06, "logits/chosen": -0.7553062438964844, "logits/rejected": -0.736265242099762, "logps/chosen": -211.53018188476562, "logps/rejected": -330.5611877441406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2881102561950684, "rewards/margins": 12.8878755569458, "rewards/rejected": -14.175987243652344, "step": 1685 }, { "epoch": 0.58, "learning_rate": 1.6714821644289054e-06, "logits/chosen": -0.6740593910217285, "logits/rejected": -0.6704998016357422, "logps/chosen": -158.95028686523438, "logps/rejected": -337.99627685546875, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.8043254017829895, "rewards/margins": 15.072783470153809, "rewards/rejected": -15.877108573913574, "step": 1686 }, { "epoch": 0.58, "learning_rate": 1.6710724937573549e-06, "logits/chosen": -0.6636052131652832, "logits/rejected": -0.6534518003463745, "logps/chosen": -203.35633850097656, "logps/rejected": -317.89349365234375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.09195652604103088, "rewards/margins": 12.03331184387207, "rewards/rejected": -11.941354751586914, "step": 1687 }, { "epoch": 0.58, "learning_rate": 1.6706626180824183e-06, "logits/chosen": -0.726425290107727, "logits/rejected": -0.6822391152381897, "logps/chosen": -225.206787109375, "logps/rejected": -271.234375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.24905547499656677, "rewards/margins": 10.694706916809082, "rewards/rejected": -10.943761825561523, "step": 1688 }, { "epoch": 0.58, "learning_rate": 1.6702525375293073e-06, "logits/chosen": -0.6474798321723938, "logits/rejected": -0.6184061765670776, "logps/chosen": -192.81475830078125, "logps/rejected": -296.1175231933594, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.2477599382400513, "rewards/margins": 12.283803939819336, "rewards/rejected": -13.531563758850098, "step": 1689 }, { "epoch": 0.58, "learning_rate": 1.669842252223296e-06, "logits/chosen": -0.6508229970932007, "logits/rejected": -0.5998287796974182, "logps/chosen": -225.5176544189453, "logps/rejected": -285.2539367675781, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.2830437421798706, "rewards/margins": 11.576282501220703, "rewards/rejected": -11.293238639831543, "step": 1690 }, { "epoch": 0.58, "learning_rate": 1.6694317622897205e-06, "logits/chosen": -0.5897535085678101, "logits/rejected": -0.5612432956695557, "logps/chosen": -171.7293701171875, "logps/rejected": -213.5333251953125, "loss": 0.0236, "rewards/accuracies": 0.9375, "rewards/chosen": -1.638816237449646, "rewards/margins": 6.827167510986328, "rewards/rejected": -8.465984344482422, "step": 1691 }, { "epoch": 0.58, "learning_rate": 1.66902106785398e-06, "logits/chosen": -0.5754356980323792, "logits/rejected": -0.5655592083930969, "logps/chosen": -160.39505004882812, "logps/rejected": -323.3074645996094, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.350157380104065, "rewards/margins": 13.498458862304688, "rewards/rejected": -14.848615646362305, "step": 1692 }, { "epoch": 0.58, "learning_rate": 1.668610169041536e-06, "logits/chosen": -0.6937431693077087, "logits/rejected": -0.6556819081306458, "logps/chosen": -191.11404418945312, "logps/rejected": -220.3096160888672, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": -0.16310246288776398, "rewards/margins": 8.689403533935547, "rewards/rejected": -8.852505683898926, "step": 1693 }, { "epoch": 0.58, "learning_rate": 1.6681990659779123e-06, "logits/chosen": -0.7348288297653198, "logits/rejected": -0.7472873330116272, "logps/chosen": -99.1307373046875, "logps/rejected": -281.7118835449219, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.9254021644592285, "rewards/margins": 13.657110214233398, "rewards/rejected": -15.582510948181152, "step": 1694 }, { "epoch": 0.58, "learning_rate": 1.6677877587886955e-06, "logits/chosen": -0.8057088851928711, "logits/rejected": -0.7881041169166565, "logps/chosen": -179.6177520751953, "logps/rejected": -265.32373046875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4673822522163391, "rewards/margins": 10.983482360839844, "rewards/rejected": -11.4508638381958, "step": 1695 }, { "epoch": 0.58, "learning_rate": 1.6673762475995342e-06, "logits/chosen": -0.8367465138435364, "logits/rejected": -0.7851153016090393, "logps/chosen": -233.154541015625, "logps/rejected": -296.1542053222656, "loss": 0.0785, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1041429042816162, "rewards/margins": 13.777823448181152, "rewards/rejected": -14.881965637207031, "step": 1696 }, { "epoch": 0.58, "learning_rate": 1.6669645325361388e-06, "logits/chosen": -0.6654278635978699, "logits/rejected": -0.6539556980133057, "logps/chosen": -121.90303802490234, "logps/rejected": -223.6781768798828, "loss": 0.0153, "rewards/accuracies": 0.9375, "rewards/chosen": -0.540042519569397, "rewards/margins": 9.438529968261719, "rewards/rejected": -9.978572845458984, "step": 1697 }, { "epoch": 0.58, "learning_rate": 1.666552613724283e-06, "logits/chosen": -0.7000130414962769, "logits/rejected": -0.6771857142448425, "logps/chosen": -175.005126953125, "logps/rejected": -301.218505859375, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515927195549011, "rewards/margins": 14.154431343078613, "rewards/rejected": -15.006026268005371, "step": 1698 }, { "epoch": 0.58, "learning_rate": 1.6661404912898024e-06, "logits/chosen": -0.688164234161377, "logits/rejected": -0.6727953553199768, "logps/chosen": -117.76161193847656, "logps/rejected": -225.81190490722656, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -1.5554696321487427, "rewards/margins": 9.778677940368652, "rewards/rejected": -11.334148406982422, "step": 1699 }, { "epoch": 0.58, "learning_rate": 1.6657281653585952e-06, "logits/chosen": -0.6727764010429382, "logits/rejected": -0.6569643616676331, "logps/chosen": -187.94976806640625, "logps/rejected": -307.34771728515625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.8279867768287659, "rewards/margins": 12.038843154907227, "rewards/rejected": -12.866830825805664, "step": 1700 }, { "epoch": 0.58, "learning_rate": 1.6653156360566203e-06, "logits/chosen": -0.6923679709434509, "logits/rejected": -0.686487078666687, "logps/chosen": -237.835693359375, "logps/rejected": -333.6687316894531, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.510716438293457, "rewards/margins": 10.371360778808594, "rewards/rejected": -10.882078170776367, "step": 1701 }, { "epoch": 0.58, "learning_rate": 1.6649029035099e-06, "logits/chosen": -0.6802188754081726, "logits/rejected": -0.663191556930542, "logps/chosen": -234.92784118652344, "logps/rejected": -339.9120788574219, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 0.2758311629295349, "rewards/margins": 12.61962890625, "rewards/rejected": -12.343796730041504, "step": 1702 }, { "epoch": 0.58, "learning_rate": 1.6644899678445188e-06, "logits/chosen": -0.6613546013832092, "logits/rejected": -0.6397277116775513, "logps/chosen": -162.73252868652344, "logps/rejected": -276.85357666015625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.1143815517425537, "rewards/margins": 11.217633247375488, "rewards/rejected": -13.332015991210938, "step": 1703 }, { "epoch": 0.58, "learning_rate": 1.6640768291866225e-06, "logits/chosen": -0.6796828508377075, "logits/rejected": -0.648565948009491, "logps/chosen": -210.88621520996094, "logps/rejected": -366.56280517578125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.6106916666030884, "rewards/margins": 14.648300170898438, "rewards/rejected": -16.258991241455078, "step": 1704 }, { "epoch": 0.58, "learning_rate": 1.6636634876624194e-06, "logits/chosen": -0.777850329875946, "logits/rejected": -0.7417632937431335, "logps/chosen": -241.73878479003906, "logps/rejected": -371.92498779296875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.47902584075927734, "rewards/margins": 15.048306465148926, "rewards/rejected": -15.527332305908203, "step": 1705 }, { "epoch": 0.58, "learning_rate": 1.6632499433981794e-06, "logits/chosen": -0.7570542097091675, "logits/rejected": -0.7179144620895386, "logps/chosen": -223.31381225585938, "logps/rejected": -326.9748229980469, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7227945327758789, "rewards/margins": 15.519628524780273, "rewards/rejected": -16.24242401123047, "step": 1706 }, { "epoch": 0.58, "learning_rate": 1.6628361965202347e-06, "logits/chosen": -0.5589651465415955, "logits/rejected": -0.5380529165267944, "logps/chosen": -217.80581665039062, "logps/rejected": -318.1239929199219, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.3511478006839752, "rewards/margins": 12.107643127441406, "rewards/rejected": -12.45879077911377, "step": 1707 }, { "epoch": 0.58, "learning_rate": 1.6624222471549796e-06, "logits/chosen": -0.6771066784858704, "logits/rejected": -0.6385065913200378, "logps/chosen": -231.9700164794922, "logps/rejected": -355.0995178222656, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.0645829439163208, "rewards/margins": 15.370580673217773, "rewards/rejected": -15.305997848510742, "step": 1708 }, { "epoch": 0.58, "learning_rate": 1.6620080954288693e-06, "logits/chosen": -0.7055566906929016, "logits/rejected": -0.6610589027404785, "logps/chosen": -183.6772918701172, "logps/rejected": -269.371826171875, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 0.40812385082244873, "rewards/margins": 10.755777359008789, "rewards/rejected": -10.347654342651367, "step": 1709 }, { "epoch": 0.58, "learning_rate": 1.661593741468422e-06, "logits/chosen": -0.7139325141906738, "logits/rejected": -0.6334975957870483, "logps/chosen": -270.4159240722656, "logps/rejected": -334.07489013671875, "loss": 0.0533, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2517240345478058, "rewards/margins": 11.873671531677246, "rewards/rejected": -11.6219482421875, "step": 1710 }, { "epoch": 0.58, "learning_rate": 1.661179185400217e-06, "logits/chosen": -0.6842502355575562, "logits/rejected": -0.66696697473526, "logps/chosen": -184.53668212890625, "logps/rejected": -339.19549560546875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.395539402961731, "rewards/margins": 13.299464225769043, "rewards/rejected": -14.695003509521484, "step": 1711 }, { "epoch": 0.58, "learning_rate": 1.660764427350895e-06, "logits/chosen": -0.7598471641540527, "logits/rejected": -0.7200608849525452, "logps/chosen": -211.33468627929688, "logps/rejected": -412.9658203125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.6016887426376343, "rewards/margins": 18.451650619506836, "rewards/rejected": -20.05333709716797, "step": 1712 }, { "epoch": 0.58, "learning_rate": 1.6603494674471593e-06, "logits/chosen": -0.6950326561927795, "logits/rejected": -0.6655553579330444, "logps/chosen": -165.92242431640625, "logps/rejected": -271.0128173828125, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -0.9187010526657104, "rewards/margins": 10.95024585723877, "rewards/rejected": -11.868947982788086, "step": 1713 }, { "epoch": 0.58, "learning_rate": 1.6599343058157745e-06, "logits/chosen": -0.6450177431106567, "logits/rejected": -0.625394880771637, "logps/chosen": -154.8619842529297, "logps/rejected": -292.74493408203125, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -1.3890600204467773, "rewards/margins": 12.422538757324219, "rewards/rejected": -13.811598777770996, "step": 1714 }, { "epoch": 0.59, "learning_rate": 1.6595189425835664e-06, "logits/chosen": -0.5905261039733887, "logits/rejected": -0.5673084855079651, "logps/chosen": -192.92422485351562, "logps/rejected": -369.48504638671875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.9659144878387451, "rewards/margins": 15.134316444396973, "rewards/rejected": -16.100229263305664, "step": 1715 }, { "epoch": 0.59, "learning_rate": 1.6591033778774228e-06, "logits/chosen": -0.6960373520851135, "logits/rejected": -0.6819602847099304, "logps/chosen": -167.81097412109375, "logps/rejected": -346.99957275390625, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -1.5633721351623535, "rewards/margins": 15.949541091918945, "rewards/rejected": -17.51291275024414, "step": 1716 }, { "epoch": 0.59, "learning_rate": 1.6586876118242932e-06, "logits/chosen": -0.756721019744873, "logits/rejected": -0.7580384016036987, "logps/chosen": -179.73818969726562, "logps/rejected": -318.52264404296875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.0056936740875244, "rewards/margins": 13.038304328918457, "rewards/rejected": -14.043998718261719, "step": 1717 }, { "epoch": 0.59, "learning_rate": 1.6582716445511882e-06, "logits/chosen": -0.6973318457603455, "logits/rejected": -0.6802809238433838, "logps/chosen": -169.37620544433594, "logps/rejected": -347.2066955566406, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.9139719009399414, "rewards/margins": 16.897411346435547, "rewards/rejected": -17.811384201049805, "step": 1718 }, { "epoch": 0.59, "learning_rate": 1.6578554761851801e-06, "logits/chosen": -0.6629623770713806, "logits/rejected": -0.64870285987854, "logps/chosen": -184.29681396484375, "logps/rejected": -340.1089172363281, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3307892382144928, "rewards/margins": 12.56788444519043, "rewards/rejected": -12.898674011230469, "step": 1719 }, { "epoch": 0.59, "learning_rate": 1.6574391068534028e-06, "logits/chosen": -0.632085382938385, "logits/rejected": -0.6098169684410095, "logps/chosen": -208.69593811035156, "logps/rejected": -294.5067138671875, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -0.3798258304595947, "rewards/margins": 12.007015228271484, "rewards/rejected": -12.386841773986816, "step": 1720 }, { "epoch": 0.59, "learning_rate": 1.6570225366830512e-06, "logits/chosen": -0.6781051158905029, "logits/rejected": -0.6798679232597351, "logps/chosen": -151.17857360839844, "logps/rejected": -267.3885803222656, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.490712970495224, "rewards/margins": 11.87061882019043, "rewards/rejected": -11.379907608032227, "step": 1721 }, { "epoch": 0.59, "learning_rate": 1.6566057658013817e-06, "logits/chosen": -0.7132298946380615, "logits/rejected": -0.6447461247444153, "logps/chosen": -223.5684356689453, "logps/rejected": -286.0470886230469, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4025442898273468, "rewards/margins": 12.085824966430664, "rewards/rejected": -12.488369941711426, "step": 1722 }, { "epoch": 0.59, "learning_rate": 1.6561887943357123e-06, "logits/chosen": -0.7422062754631042, "logits/rejected": -0.7216408252716064, "logps/chosen": -188.41456604003906, "logps/rejected": -295.51898193359375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.7367186546325684, "rewards/margins": 10.751224517822266, "rewards/rejected": -11.48794174194336, "step": 1723 }, { "epoch": 0.59, "learning_rate": 1.6557716224134214e-06, "logits/chosen": -0.759363055229187, "logits/rejected": -0.7387878894805908, "logps/chosen": -250.00164794921875, "logps/rejected": -368.6333923339844, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": 1.127076268196106, "rewards/margins": 13.124446868896484, "rewards/rejected": -11.997370719909668, "step": 1724 }, { "epoch": 0.59, "learning_rate": 1.6553542501619503e-06, "logits/chosen": -0.7664514780044556, "logits/rejected": -0.7503120303153992, "logps/chosen": -179.58834838867188, "logps/rejected": -289.9739685058594, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.9153579473495483, "rewards/margins": 12.166038513183594, "rewards/rejected": -13.08139705657959, "step": 1725 }, { "epoch": 0.59, "learning_rate": 1.6549366777087995e-06, "logits/chosen": -0.7393773794174194, "logits/rejected": -0.7187697291374207, "logps/chosen": -209.39382934570312, "logps/rejected": -328.72381591796875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.8157694935798645, "rewards/margins": 12.577667236328125, "rewards/rejected": -13.393436431884766, "step": 1726 }, { "epoch": 0.59, "learning_rate": 1.6545189051815317e-06, "logits/chosen": -0.7510875463485718, "logits/rejected": -0.7241488099098206, "logps/chosen": -213.85211181640625, "logps/rejected": -306.7589111328125, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.2404576987028122, "rewards/margins": 10.922723770141602, "rewards/rejected": -11.16318130493164, "step": 1727 }, { "epoch": 0.59, "learning_rate": 1.6541009327077714e-06, "logits/chosen": -0.6938706040382385, "logits/rejected": -0.6498547196388245, "logps/chosen": -277.7762451171875, "logps/rejected": -383.9676513671875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.746928334236145, "rewards/margins": 15.172480583190918, "rewards/rejected": -15.919408798217773, "step": 1728 }, { "epoch": 0.59, "learning_rate": 1.6536827604152025e-06, "logits/chosen": -0.6794137358665466, "logits/rejected": -0.6366716027259827, "logps/chosen": -218.82254028320312, "logps/rejected": -358.11090087890625, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.8344234824180603, "rewards/margins": 14.967464447021484, "rewards/rejected": -15.801887512207031, "step": 1729 }, { "epoch": 0.59, "learning_rate": 1.6532643884315719e-06, "logits/chosen": -0.8128645420074463, "logits/rejected": -0.7788275480270386, "logps/chosen": -216.45242309570312, "logps/rejected": -307.8891296386719, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.4152781963348389, "rewards/margins": 11.106237411499023, "rewards/rejected": -12.521514892578125, "step": 1730 }, { "epoch": 0.59, "learning_rate": 1.6528458168846853e-06, "logits/chosen": -0.7232426404953003, "logits/rejected": -0.7108601927757263, "logps/chosen": -264.0284423828125, "logps/rejected": -416.37744140625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.2880803942680359, "rewards/margins": 13.125947952270508, "rewards/rejected": -13.41402816772461, "step": 1731 }, { "epoch": 0.59, "learning_rate": 1.6524270459024115e-06, "logits/chosen": -0.62004154920578, "logits/rejected": -0.6045466065406799, "logps/chosen": -160.12374877929688, "logps/rejected": -272.0143127441406, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": 0.6667299866676331, "rewards/margins": 13.756254196166992, "rewards/rejected": -13.089523315429688, "step": 1732 }, { "epoch": 0.59, "learning_rate": 1.6520080756126786e-06, "logits/chosen": -0.6253198385238647, "logits/rejected": -0.5785864591598511, "logps/chosen": -157.52365112304688, "logps/rejected": -254.7277069091797, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.08056075125932693, "rewards/margins": 11.83799934387207, "rewards/rejected": -11.918560028076172, "step": 1733 }, { "epoch": 0.59, "learning_rate": 1.6515889061434768e-06, "logits/chosen": -0.5767019987106323, "logits/rejected": -0.5518288016319275, "logps/chosen": -211.65179443359375, "logps/rejected": -313.2957458496094, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.3737837076187134, "rewards/margins": 9.361781120300293, "rewards/rejected": -9.735565185546875, "step": 1734 }, { "epoch": 0.59, "learning_rate": 1.6511695376228558e-06, "logits/chosen": -0.6210060119628906, "logits/rejected": -0.6048425436019897, "logps/chosen": -169.60523986816406, "logps/rejected": -283.0968933105469, "loss": 0.0241, "rewards/accuracies": 0.9375, "rewards/chosen": -1.544533133506775, "rewards/margins": 11.519484519958496, "rewards/rejected": -13.064018249511719, "step": 1735 }, { "epoch": 0.59, "learning_rate": 1.6507499701789278e-06, "logits/chosen": -0.6102010011672974, "logits/rejected": -0.6060441136360168, "logps/chosen": -166.99147033691406, "logps/rejected": -318.0325927734375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.8951573371887207, "rewards/margins": 12.152375221252441, "rewards/rejected": -13.047531127929688, "step": 1736 }, { "epoch": 0.59, "learning_rate": 1.6503302039398642e-06, "logits/chosen": -0.6740356087684631, "logits/rejected": -0.6505767107009888, "logps/chosen": -183.99154663085938, "logps/rejected": -293.63995361328125, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -0.82532799243927, "rewards/margins": 12.620384216308594, "rewards/rejected": -13.44571304321289, "step": 1737 }, { "epoch": 0.59, "learning_rate": 1.6499102390338985e-06, "logits/chosen": -0.6566532254219055, "logits/rejected": -0.6423652172088623, "logps/chosen": -203.53411865234375, "logps/rejected": -307.4617919921875, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.0313721895217896, "rewards/margins": 10.807324409484863, "rewards/rejected": -11.838695526123047, "step": 1738 }, { "epoch": 0.59, "learning_rate": 1.6494900755893235e-06, "logits/chosen": -0.6920870542526245, "logits/rejected": -0.688909649848938, "logps/chosen": -208.59878540039062, "logps/rejected": -309.02960205078125, "loss": 0.0336, "rewards/accuracies": 0.9375, "rewards/chosen": -0.786460816860199, "rewards/margins": 11.702799797058105, "rewards/rejected": -12.489259719848633, "step": 1739 }, { "epoch": 0.59, "learning_rate": 1.6490697137344936e-06, "logits/chosen": -0.6309604048728943, "logits/rejected": -0.6297290921211243, "logps/chosen": -151.66253662109375, "logps/rejected": -287.3520812988281, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.9749884009361267, "rewards/margins": 12.657751083374023, "rewards/rejected": -13.632740020751953, "step": 1740 }, { "epoch": 0.59, "learning_rate": 1.6486491535978234e-06, "logits/chosen": -0.6522319316864014, "logits/rejected": -0.6454746127128601, "logps/chosen": -188.69384765625, "logps/rejected": -350.1791687011719, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -0.8196195363998413, "rewards/margins": 12.90821647644043, "rewards/rejected": -13.727836608886719, "step": 1741 }, { "epoch": 0.59, "learning_rate": 1.6482283953077884e-06, "logits/chosen": -0.6600573658943176, "logits/rejected": -0.6246128082275391, "logps/chosen": -245.29156494140625, "logps/rejected": -341.13482666015625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 0.17846150696277618, "rewards/margins": 12.085481643676758, "rewards/rejected": -11.907020568847656, "step": 1742 }, { "epoch": 0.59, "learning_rate": 1.6478074389929244e-06, "logits/chosen": -0.6578768491744995, "logits/rejected": -0.6200316548347473, "logps/chosen": -148.6616973876953, "logps/rejected": -181.93209838867188, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.06346303224563599, "rewards/margins": 9.324663162231445, "rewards/rejected": -9.388126373291016, "step": 1743 }, { "epoch": 0.6, "learning_rate": 1.6473862847818276e-06, "logits/chosen": -0.6497825980186462, "logits/rejected": -0.589008092880249, "logps/chosen": -222.58384704589844, "logps/rejected": -366.1767578125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6002280116081238, "rewards/margins": 14.814566612243652, "rewards/rejected": -15.414794921875, "step": 1744 }, { "epoch": 0.6, "learning_rate": 1.6469649328031549e-06, "logits/chosen": -0.7004679441452026, "logits/rejected": -0.6690465211868286, "logps/chosen": -224.59512329101562, "logps/rejected": -316.2471618652344, "loss": 0.0245, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2699238061904907, "rewards/margins": 11.274223327636719, "rewards/rejected": -12.544147491455078, "step": 1745 }, { "epoch": 0.6, "learning_rate": 1.6465433831856236e-06, "logits/chosen": -0.6554700136184692, "logits/rejected": -0.6279727816581726, "logps/chosen": -193.0115203857422, "logps/rejected": -287.0308837890625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.40653905272483826, "rewards/margins": 11.037227630615234, "rewards/rejected": -11.443767547607422, "step": 1746 }, { "epoch": 0.6, "learning_rate": 1.6461216360580115e-06, "logits/chosen": -0.6036203503608704, "logits/rejected": -0.572692334651947, "logps/chosen": -190.69419860839844, "logps/rejected": -272.3825378417969, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -0.855477511882782, "rewards/margins": 11.559785842895508, "rewards/rejected": -12.415261268615723, "step": 1747 }, { "epoch": 0.6, "learning_rate": 1.645699691549156e-06, "logits/chosen": -0.6355549693107605, "logits/rejected": -0.6287662982940674, "logps/chosen": -179.3010711669922, "logps/rejected": -345.3907470703125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.5028789639472961, "rewards/margins": 14.188447952270508, "rewards/rejected": -14.691328048706055, "step": 1748 }, { "epoch": 0.6, "learning_rate": 1.6452775497879557e-06, "logits/chosen": -0.6669185757637024, "logits/rejected": -0.6371012926101685, "logps/chosen": -192.95909118652344, "logps/rejected": -299.088623046875, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.5449545383453369, "rewards/margins": 13.634110450744629, "rewards/rejected": -14.179065704345703, "step": 1749 }, { "epoch": 0.6, "learning_rate": 1.6448552109033688e-06, "logits/chosen": -0.640876293182373, "logits/rejected": -0.6162412166595459, "logps/chosen": -248.30239868164062, "logps/rejected": -372.072509765625, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -1.29751718044281, "rewards/margins": 14.800907135009766, "rewards/rejected": -16.09842300415039, "step": 1750 }, { "epoch": 0.6, "learning_rate": 1.6444326750244142e-06, "logits/chosen": -0.6506884694099426, "logits/rejected": -0.6102304458618164, "logps/chosen": -164.63623046875, "logps/rejected": -235.4125518798828, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4867703914642334, "rewards/margins": 10.738134384155273, "rewards/rejected": -12.22490406036377, "step": 1751 }, { "epoch": 0.6, "learning_rate": 1.6440099422801707e-06, "logits/chosen": -0.6554126143455505, "logits/rejected": -0.6313040852546692, "logps/chosen": -180.2375946044922, "logps/rejected": -342.5199279785156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.7700138688087463, "rewards/margins": 15.527518272399902, "rewards/rejected": -16.29753303527832, "step": 1752 }, { "epoch": 0.6, "learning_rate": 1.6435870127997775e-06, "logits/chosen": -0.6357132792472839, "logits/rejected": -0.620685875415802, "logps/chosen": -183.53335571289062, "logps/rejected": -355.9786682128906, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.40952107310295105, "rewards/margins": 16.57674789428711, "rewards/rejected": -16.986268997192383, "step": 1753 }, { "epoch": 0.6, "learning_rate": 1.6431638867124336e-06, "logits/chosen": -0.7083224058151245, "logits/rejected": -0.688766598701477, "logps/chosen": -218.37710571289062, "logps/rejected": -384.0654296875, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": -1.1393299102783203, "rewards/margins": 15.567792892456055, "rewards/rejected": -16.707122802734375, "step": 1754 }, { "epoch": 0.6, "learning_rate": 1.6427405641473979e-06, "logits/chosen": -0.7135285139083862, "logits/rejected": -0.6962069272994995, "logps/chosen": -243.61585998535156, "logps/rejected": -414.14898681640625, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.896210789680481, "rewards/margins": 15.133591651916504, "rewards/rejected": -16.029802322387695, "step": 1755 }, { "epoch": 0.6, "learning_rate": 1.6423170452339903e-06, "logits/chosen": -0.6694560647010803, "logits/rejected": -0.6631605625152588, "logps/chosen": -191.4815673828125, "logps/rejected": -376.27154541015625, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -0.678517758846283, "rewards/margins": 14.499598503112793, "rewards/rejected": -15.178115844726562, "step": 1756 }, { "epoch": 0.6, "learning_rate": 1.6418933301015894e-06, "logits/chosen": -0.7039008736610413, "logits/rejected": -0.6833719611167908, "logps/chosen": -173.2949676513672, "logps/rejected": -291.51617431640625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.036302536725997925, "rewards/margins": 11.492307662963867, "rewards/rejected": -11.528610229492188, "step": 1757 }, { "epoch": 0.6, "learning_rate": 1.6414694188796343e-06, "logits/chosen": -0.7112940549850464, "logits/rejected": -0.6856260895729065, "logps/chosen": -174.00209045410156, "logps/rejected": -321.0318603515625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1639164686203003, "rewards/margins": 13.764425277709961, "rewards/rejected": -14.928343772888184, "step": 1758 }, { "epoch": 0.6, "learning_rate": 1.6410453116976241e-06, "logits/chosen": -0.6529361605644226, "logits/rejected": -0.6400169730186462, "logps/chosen": -215.07086181640625, "logps/rejected": -336.9606018066406, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.3053126335144043, "rewards/margins": 11.080191612243652, "rewards/rejected": -13.385503768920898, "step": 1759 }, { "epoch": 0.6, "learning_rate": 1.6406210086851181e-06, "logits/chosen": -0.5876055955886841, "logits/rejected": -0.5506234765052795, "logps/chosen": -170.33990478515625, "logps/rejected": -317.891845703125, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -1.631553053855896, "rewards/margins": 11.869873046875, "rewards/rejected": -13.501423835754395, "step": 1760 }, { "epoch": 0.6, "learning_rate": 1.640196509971735e-06, "logits/chosen": -0.7146506309509277, "logits/rejected": -0.7069774270057678, "logps/chosen": -214.65521240234375, "logps/rejected": -332.90570068359375, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.24148553609848022, "rewards/margins": 12.840994834899902, "rewards/rejected": -13.082480430603027, "step": 1761 }, { "epoch": 0.6, "learning_rate": 1.6397718156871524e-06, "logits/chosen": -0.7221274375915527, "logits/rejected": -0.7057849168777466, "logps/chosen": -172.1302490234375, "logps/rejected": -271.4527893066406, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.677219569683075, "rewards/margins": 9.629316329956055, "rewards/rejected": -10.306535720825195, "step": 1762 }, { "epoch": 0.6, "learning_rate": 1.6393469259611092e-06, "logits/chosen": -0.6799906492233276, "logits/rejected": -0.6643944382667542, "logps/chosen": -226.87750244140625, "logps/rejected": -314.22015380859375, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 0.16222800314426422, "rewards/margins": 9.473073959350586, "rewards/rejected": -9.310847282409668, "step": 1763 }, { "epoch": 0.6, "learning_rate": 1.6389218409234037e-06, "logits/chosen": -0.635887861251831, "logits/rejected": -0.6139234900474548, "logps/chosen": -177.57540893554688, "logps/rejected": -311.7269592285156, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0062775611877441, "rewards/margins": 13.136860847473145, "rewards/rejected": -14.143138885498047, "step": 1764 }, { "epoch": 0.6, "learning_rate": 1.6384965607038927e-06, "logits/chosen": -0.7583673000335693, "logits/rejected": -0.7101497650146484, "logps/chosen": -217.26206970214844, "logps/rejected": -311.3603210449219, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.02795785665512085, "rewards/margins": 13.022047996520996, "rewards/rejected": -12.994091033935547, "step": 1765 }, { "epoch": 0.6, "learning_rate": 1.6380710854324943e-06, "logits/chosen": -0.6952612400054932, "logits/rejected": -0.6655001044273376, "logps/chosen": -174.17140197753906, "logps/rejected": -265.1739196777344, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.020383618772029877, "rewards/margins": 11.26616382598877, "rewards/rejected": -11.286547660827637, "step": 1766 }, { "epoch": 0.6, "learning_rate": 1.6376454152391844e-06, "logits/chosen": -0.6726490259170532, "logits/rejected": -0.6444317102432251, "logps/chosen": -240.02407836914062, "logps/rejected": -342.4490661621094, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.7588359713554382, "rewards/margins": 14.169514656066895, "rewards/rejected": -14.928351402282715, "step": 1767 }, { "epoch": 0.6, "learning_rate": 1.6372195502540001e-06, "logits/chosen": -0.6706165671348572, "logits/rejected": -0.6661666035652161, "logps/chosen": -232.46002197265625, "logps/rejected": -398.4287414550781, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -2.780754327774048, "rewards/margins": 13.510648727416992, "rewards/rejected": -16.29140281677246, "step": 1768 }, { "epoch": 0.6, "learning_rate": 1.6367934906070368e-06, "logits/chosen": -0.6900267004966736, "logits/rejected": -0.6560429334640503, "logps/chosen": -265.7603759765625, "logps/rejected": -384.4805908203125, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.4136061370372772, "rewards/margins": 12.173151969909668, "rewards/rejected": -12.586758613586426, "step": 1769 }, { "epoch": 0.6, "learning_rate": 1.6363672364284505e-06, "logits/chosen": -0.6610851883888245, "logits/rejected": -0.6281415224075317, "logps/chosen": -231.64306640625, "logps/rejected": -353.2839050292969, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.19145777821540833, "rewards/margins": 13.137043952941895, "rewards/rejected": -13.32850170135498, "step": 1770 }, { "epoch": 0.6, "learning_rate": 1.635940787848455e-06, "logits/chosen": -0.6586236357688904, "logits/rejected": -0.6140133142471313, "logps/chosen": -192.73239135742188, "logps/rejected": -276.14739990234375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0445530414581299, "rewards/margins": 12.800430297851562, "rewards/rejected": -13.84498405456543, "step": 1771 }, { "epoch": 0.6, "learning_rate": 1.6355141449973253e-06, "logits/chosen": -0.6988998651504517, "logits/rejected": -0.6719484925270081, "logps/chosen": -185.2490234375, "logps/rejected": -307.05303955078125, "loss": 0.06, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2813518047332764, "rewards/margins": 12.451654434204102, "rewards/rejected": -13.73300552368164, "step": 1772 }, { "epoch": 0.61, "learning_rate": 1.6350873080053947e-06, "logits/chosen": -0.7381428480148315, "logits/rejected": -0.7092503905296326, "logps/chosen": -195.72528076171875, "logps/rejected": -340.3776550292969, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.3938392400741577, "rewards/margins": 13.773727416992188, "rewards/rejected": -15.167566299438477, "step": 1773 }, { "epoch": 0.61, "learning_rate": 1.6346602770030555e-06, "logits/chosen": -0.7094921469688416, "logits/rejected": -0.6626166105270386, "logps/chosen": -245.82716369628906, "logps/rejected": -343.679443359375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.7107285261154175, "rewards/margins": 14.23176383972168, "rewards/rejected": -14.942492485046387, "step": 1774 }, { "epoch": 0.61, "learning_rate": 1.6342330521207606e-06, "logits/chosen": -0.6189486384391785, "logits/rejected": -0.559769332408905, "logps/chosen": -218.48265075683594, "logps/rejected": -315.69561767578125, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4576966762542725, "rewards/margins": 11.364355087280273, "rewards/rejected": -12.822052001953125, "step": 1775 }, { "epoch": 0.61, "learning_rate": 1.6338056334890208e-06, "logits/chosen": -0.7353213429450989, "logits/rejected": -0.6967801451683044, "logps/chosen": -222.045166015625, "logps/rejected": -302.0635070800781, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9956464767456055, "rewards/margins": 12.603067398071289, "rewards/rejected": -13.598714828491211, "step": 1776 }, { "epoch": 0.61, "learning_rate": 1.6333780212384064e-06, "logits/chosen": -0.6775169372558594, "logits/rejected": -0.6432597637176514, "logps/chosen": -177.75291442871094, "logps/rejected": -283.21905517578125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.6602239608764648, "rewards/margins": 13.291959762573242, "rewards/rejected": -14.952183723449707, "step": 1777 }, { "epoch": 0.61, "learning_rate": 1.6329502154995476e-06, "logits/chosen": -0.6959144473075867, "logits/rejected": -0.6692370772361755, "logps/chosen": -223.21371459960938, "logps/rejected": -275.0472412109375, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -0.8412342071533203, "rewards/margins": 9.708598136901855, "rewards/rejected": -10.549833297729492, "step": 1778 }, { "epoch": 0.61, "learning_rate": 1.6325222164031328e-06, "logits/chosen": -0.6039522290229797, "logits/rejected": -0.5620238184928894, "logps/chosen": -279.4096984863281, "logps/rejected": -395.3747863769531, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 0.9693872928619385, "rewards/margins": 16.824201583862305, "rewards/rejected": -15.854814529418945, "step": 1779 }, { "epoch": 0.61, "learning_rate": 1.6320940240799099e-06, "logits/chosen": -0.6778876781463623, "logits/rejected": -0.6478546857833862, "logps/chosen": -186.35809326171875, "logps/rejected": -300.64862060546875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.5640819668769836, "rewards/margins": 14.531658172607422, "rewards/rejected": -13.967575073242188, "step": 1780 }, { "epoch": 0.61, "learning_rate": 1.6316656386606855e-06, "logits/chosen": -0.6059401035308838, "logits/rejected": -0.5937089323997498, "logps/chosen": -172.31788635253906, "logps/rejected": -324.1508483886719, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.3072875440120697, "rewards/margins": 13.612297058105469, "rewards/rejected": -13.91958236694336, "step": 1781 }, { "epoch": 0.61, "learning_rate": 1.631237060276326e-06, "logits/chosen": -0.6624149084091187, "logits/rejected": -0.6495735049247742, "logps/chosen": -253.801513671875, "logps/rejected": -407.71099853515625, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.9656031727790833, "rewards/margins": 16.75552749633789, "rewards/rejected": -17.721134185791016, "step": 1782 }, { "epoch": 0.61, "learning_rate": 1.6308082890577554e-06, "logits/chosen": -0.646557092666626, "logits/rejected": -0.6068226099014282, "logps/chosen": -179.45367431640625, "logps/rejected": -267.6297912597656, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.9660810828208923, "rewards/margins": 11.468502044677734, "rewards/rejected": -12.43458366394043, "step": 1783 }, { "epoch": 0.61, "learning_rate": 1.6303793251359583e-06, "logits/chosen": -0.5965278744697571, "logits/rejected": -0.5494959950447083, "logps/chosen": -228.17465209960938, "logps/rejected": -305.0019226074219, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.8091340065002441, "rewards/margins": 12.996095657348633, "rewards/rejected": -13.805229187011719, "step": 1784 }, { "epoch": 0.61, "learning_rate": 1.6299501686419768e-06, "logits/chosen": -0.613320529460907, "logits/rejected": -0.5687969326972961, "logps/chosen": -155.97767639160156, "logps/rejected": -336.3689880371094, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.073960065841675, "rewards/margins": 15.1788969039917, "rewards/rejected": -17.252857208251953, "step": 1785 }, { "epoch": 0.61, "learning_rate": 1.629520819706912e-06, "logits/chosen": -0.7180323600769043, "logits/rejected": -0.6748465895652771, "logps/chosen": -173.3565673828125, "logps/rejected": -229.59420776367188, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.7450674772262573, "rewards/margins": 9.771926879882812, "rewards/rejected": -11.51699447631836, "step": 1786 }, { "epoch": 0.61, "learning_rate": 1.6290912784619245e-06, "logits/chosen": -0.6284857988357544, "logits/rejected": -0.6264145374298096, "logps/chosen": -167.0550537109375, "logps/rejected": -291.3396911621094, "loss": 0.0722, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6541363000869751, "rewards/margins": 11.09755802154541, "rewards/rejected": -11.751694679260254, "step": 1787 }, { "epoch": 0.61, "learning_rate": 1.628661545038233e-06, "logits/chosen": -0.7126348614692688, "logits/rejected": -0.7003728747367859, "logps/chosen": -191.4261016845703, "logps/rejected": -326.7235107421875, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -1.5120792388916016, "rewards/margins": 12.251861572265625, "rewards/rejected": -13.763940811157227, "step": 1788 }, { "epoch": 0.61, "learning_rate": 1.6282316195671152e-06, "logits/chosen": -0.624873697757721, "logits/rejected": -0.623201310634613, "logps/chosen": -206.3991241455078, "logps/rejected": -342.5497741699219, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.384319543838501, "rewards/margins": 12.57390308380127, "rewards/rejected": -13.958222389221191, "step": 1789 }, { "epoch": 0.61, "learning_rate": 1.6278015021799077e-06, "logits/chosen": -0.749750018119812, "logits/rejected": -0.6947013139724731, "logps/chosen": -251.42919921875, "logps/rejected": -295.91119384765625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.38478749990463257, "rewards/margins": 11.595708847045898, "rewards/rejected": -11.980497360229492, "step": 1790 }, { "epoch": 0.61, "learning_rate": 1.6273711930080048e-06, "logits/chosen": -0.6871147751808167, "logits/rejected": -0.6635637283325195, "logps/chosen": -253.46636962890625, "logps/rejected": -395.3065490722656, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -2.204185962677002, "rewards/margins": 12.711814880371094, "rewards/rejected": -14.916001319885254, "step": 1791 }, { "epoch": 0.61, "learning_rate": 1.6269406921828603e-06, "logits/chosen": -0.6863841414451599, "logits/rejected": -0.6517987847328186, "logps/chosen": -192.43690490722656, "logps/rejected": -293.0890808105469, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.40976187586784363, "rewards/margins": 12.610872268676758, "rewards/rejected": -13.020633697509766, "step": 1792 }, { "epoch": 0.61, "learning_rate": 1.6265099998359865e-06, "logits/chosen": -0.6695674061775208, "logits/rejected": -0.6082988381385803, "logps/chosen": -297.0324401855469, "logps/rejected": -321.51458740234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.18651935458183289, "rewards/margins": 12.139087677001953, "rewards/rejected": -11.952569007873535, "step": 1793 }, { "epoch": 0.61, "learning_rate": 1.6260791160989538e-06, "logits/chosen": -0.6204223036766052, "logits/rejected": -0.6026820540428162, "logps/chosen": -181.2612762451172, "logps/rejected": -299.5462951660156, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.6436374187469482, "rewards/margins": 12.061037063598633, "rewards/rejected": -13.70467472076416, "step": 1794 }, { "epoch": 0.61, "learning_rate": 1.6256480411033912e-06, "logits/chosen": -0.6333902478218079, "logits/rejected": -0.5928817987442017, "logps/chosen": -168.6337432861328, "logps/rejected": -284.26483154296875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.7802963256835938, "rewards/margins": 13.655279159545898, "rewards/rejected": -14.435577392578125, "step": 1795 }, { "epoch": 0.61, "learning_rate": 1.625216774980986e-06, "logits/chosen": -0.7745504379272461, "logits/rejected": -0.7358354330062866, "logps/chosen": -285.8099365234375, "logps/rejected": -410.5581970214844, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.15829208493232727, "rewards/margins": 16.42719078063965, "rewards/rejected": -16.585481643676758, "step": 1796 }, { "epoch": 0.61, "learning_rate": 1.6247853178634845e-06, "logits/chosen": -0.7022475004196167, "logits/rejected": -0.6670143008232117, "logps/chosen": -182.993896484375, "logps/rejected": -296.96490478515625, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -2.2920682430267334, "rewards/margins": 12.339441299438477, "rewards/rejected": -14.631509780883789, "step": 1797 }, { "epoch": 0.61, "learning_rate": 1.6243536698826903e-06, "logits/chosen": -0.6383678913116455, "logits/rejected": -0.5925711989402771, "logps/chosen": -253.17047119140625, "logps/rejected": -388.43731689453125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.5958495140075684, "rewards/margins": 14.82687759399414, "rewards/rejected": -15.422727584838867, "step": 1798 }, { "epoch": 0.61, "learning_rate": 1.6239218311704666e-06, "logits/chosen": -0.558612048625946, "logits/rejected": -0.5045649409294128, "logps/chosen": -158.58705139160156, "logps/rejected": -262.873779296875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.2136855125427246, "rewards/margins": 11.844575881958008, "rewards/rejected": -13.058258056640625, "step": 1799 }, { "epoch": 0.61, "learning_rate": 1.6234898018587336e-06, "logits/chosen": -0.6640135645866394, "logits/rejected": -0.6526447534561157, "logps/chosen": -171.37008666992188, "logps/rejected": -347.9856872558594, "loss": 0.0637, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0330407619476318, "rewards/margins": 15.089256286621094, "rewards/rejected": -16.122297286987305, "step": 1800 }, { "epoch": 0.61, "learning_rate": 1.6230575820794704e-06, "logits/chosen": -0.6574814319610596, "logits/rejected": -0.6220682263374329, "logps/chosen": -138.61134338378906, "logps/rejected": -258.7137756347656, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -1.457804560661316, "rewards/margins": 11.826970100402832, "rewards/rejected": -13.284774780273438, "step": 1801 }, { "epoch": 0.62, "learning_rate": 1.622625171964714e-06, "logits/chosen": -0.6475169062614441, "logits/rejected": -0.6003554463386536, "logps/chosen": -242.73130798339844, "logps/rejected": -275.04241943359375, "loss": 0.075, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5831056833267212, "rewards/margins": 11.736329078674316, "rewards/rejected": -12.319435119628906, "step": 1802 }, { "epoch": 0.62, "learning_rate": 1.6221925716465605e-06, "logits/chosen": -0.6177420616149902, "logits/rejected": -0.5649593472480774, "logps/chosen": -238.43511962890625, "logps/rejected": -319.1045227050781, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.3641341924667358, "rewards/margins": 11.948675155639648, "rewards/rejected": -13.312810897827148, "step": 1803 }, { "epoch": 0.62, "learning_rate": 1.6217597812571628e-06, "logits/chosen": -0.6657342910766602, "logits/rejected": -0.650414764881134, "logps/chosen": -169.01235961914062, "logps/rejected": -366.7484436035156, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -2.2040274143218994, "rewards/margins": 15.74045181274414, "rewards/rejected": -17.94447898864746, "step": 1804 }, { "epoch": 0.62, "learning_rate": 1.6213268009287319e-06, "logits/chosen": -0.6534942984580994, "logits/rejected": -0.6322659850120544, "logps/chosen": -208.16305541992188, "logps/rejected": -339.25787353515625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.411165714263916, "rewards/margins": 11.55018138885498, "rewards/rejected": -13.961346626281738, "step": 1805 }, { "epoch": 0.62, "learning_rate": 1.6208936307935384e-06, "logits/chosen": -0.5161964893341064, "logits/rejected": -0.5107226967811584, "logps/chosen": -119.50706481933594, "logps/rejected": -306.9190673828125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.222684621810913, "rewards/margins": 12.319664001464844, "rewards/rejected": -13.542346954345703, "step": 1806 }, { "epoch": 0.62, "learning_rate": 1.6204602709839087e-06, "logits/chosen": -0.6902556419372559, "logits/rejected": -0.6685612797737122, "logps/chosen": -142.656494140625, "logps/rejected": -270.5250549316406, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.812274694442749, "rewards/margins": 11.263126373291016, "rewards/rejected": -13.075399398803711, "step": 1807 }, { "epoch": 0.62, "learning_rate": 1.620026721632229e-06, "logits/chosen": -0.5997219681739807, "logits/rejected": -0.571516752243042, "logps/chosen": -199.23788452148438, "logps/rejected": -324.8046569824219, "loss": 0.0344, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6787595748901367, "rewards/margins": 13.80886459350586, "rewards/rejected": -14.487625122070312, "step": 1808 }, { "epoch": 0.62, "learning_rate": 1.6195929828709424e-06, "logits/chosen": -0.6371773481369019, "logits/rejected": -0.6053807139396667, "logps/chosen": -94.19705200195312, "logps/rejected": -159.30877685546875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.3629724383354187, "rewards/margins": 8.961750984191895, "rewards/rejected": -9.324723243713379, "step": 1809 }, { "epoch": 0.62, "learning_rate": 1.6191590548325502e-06, "logits/chosen": -0.6081387996673584, "logits/rejected": -0.5801926851272583, "logps/chosen": -248.7206268310547, "logps/rejected": -342.1836242675781, "loss": 0.06, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6306337118148804, "rewards/margins": 12.371805191040039, "rewards/rejected": -14.002437591552734, "step": 1810 }, { "epoch": 0.62, "learning_rate": 1.618724937649611e-06, "logits/chosen": -0.6999755501747131, "logits/rejected": -0.6625587940216064, "logps/chosen": -249.8728485107422, "logps/rejected": -350.0882873535156, "loss": 0.0237, "rewards/accuracies": 0.9375, "rewards/chosen": -2.08856201171875, "rewards/margins": 13.2782564163208, "rewards/rejected": -15.366816520690918, "step": 1811 }, { "epoch": 0.62, "learning_rate": 1.6182906314547422e-06, "logits/chosen": -0.7888872027397156, "logits/rejected": -0.7705763578414917, "logps/chosen": -176.89804077148438, "logps/rejected": -348.533203125, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -1.236722707748413, "rewards/margins": 15.218755722045898, "rewards/rejected": -16.45547866821289, "step": 1812 }, { "epoch": 0.62, "learning_rate": 1.617856136380618e-06, "logits/chosen": -0.6979745626449585, "logits/rejected": -0.6785392761230469, "logps/chosen": -171.16574096679688, "logps/rejected": -326.59588623046875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.8720736503601074, "rewards/margins": 13.31988525390625, "rewards/rejected": -15.191959381103516, "step": 1813 }, { "epoch": 0.62, "learning_rate": 1.617421452559971e-06, "logits/chosen": -0.603821873664856, "logits/rejected": -0.5605407357215881, "logps/chosen": -287.97991943359375, "logps/rejected": -374.19927978515625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.3284556567668915, "rewards/margins": 14.86518669128418, "rewards/rejected": -14.53673267364502, "step": 1814 }, { "epoch": 0.62, "learning_rate": 1.6169865801255904e-06, "logits/chosen": -0.6424494981765747, "logits/rejected": -0.6041962504386902, "logps/chosen": -176.1598358154297, "logps/rejected": -287.9967956542969, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.4514785706996918, "rewards/margins": 13.087087631225586, "rewards/rejected": -12.63560962677002, "step": 1815 }, { "epoch": 0.62, "learning_rate": 1.6165515192103243e-06, "logits/chosen": -0.7728831171989441, "logits/rejected": -0.7316217422485352, "logps/chosen": -237.96484375, "logps/rejected": -320.0834655761719, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.1517300605773926, "rewards/margins": 12.491007804870605, "rewards/rejected": -13.64273738861084, "step": 1816 }, { "epoch": 0.62, "learning_rate": 1.6161162699470778e-06, "logits/chosen": -0.5344725847244263, "logits/rejected": -0.4921683371067047, "logps/chosen": -172.37710571289062, "logps/rejected": -320.02264404296875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.1260734796524048, "rewards/margins": 14.870485305786133, "rewards/rejected": -15.996559143066406, "step": 1817 }, { "epoch": 0.62, "learning_rate": 1.6156808324688137e-06, "logits/chosen": -0.5701106190681458, "logits/rejected": -0.5396299362182617, "logps/chosen": -197.40353393554688, "logps/rejected": -297.9709167480469, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -1.4906001091003418, "rewards/margins": 11.516251564025879, "rewards/rejected": -13.006853103637695, "step": 1818 }, { "epoch": 0.62, "learning_rate": 1.6152452069085514e-06, "logits/chosen": -0.6512477993965149, "logits/rejected": -0.643527090549469, "logps/chosen": -239.2899932861328, "logps/rejected": -375.7849426269531, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.28385335206985474, "rewards/margins": 15.445541381835938, "rewards/rejected": -15.729394912719727, "step": 1819 }, { "epoch": 0.62, "learning_rate": 1.6148093933993691e-06, "logits/chosen": -0.6473519802093506, "logits/rejected": -0.6021217703819275, "logps/chosen": -199.84515380859375, "logps/rejected": -319.7170104980469, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -0.21050560474395752, "rewards/margins": 14.884039878845215, "rewards/rejected": -15.094545364379883, "step": 1820 }, { "epoch": 0.62, "learning_rate": 1.6143733920744019e-06, "logits/chosen": -0.6441411375999451, "logits/rejected": -0.6045198440551758, "logps/chosen": -234.4540252685547, "logps/rejected": -333.9874267578125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 0.44129860401153564, "rewards/margins": 14.615447998046875, "rewards/rejected": -14.174149513244629, "step": 1821 }, { "epoch": 0.62, "learning_rate": 1.6139372030668414e-06, "logits/chosen": -0.7172173261642456, "logits/rejected": -0.6872888803482056, "logps/chosen": -174.09158325195312, "logps/rejected": -307.00262451171875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.3575326204299927, "rewards/margins": 12.04043197631836, "rewards/rejected": -13.397963523864746, "step": 1822 }, { "epoch": 0.62, "learning_rate": 1.6135008265099383e-06, "logits/chosen": -0.6983300447463989, "logits/rejected": -0.6715871691703796, "logps/chosen": -218.6987762451172, "logps/rejected": -347.2027587890625, "loss": 0.0286, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8382297158241272, "rewards/margins": 16.04376792907715, "rewards/rejected": -16.88199806213379, "step": 1823 }, { "epoch": 0.62, "learning_rate": 1.6130642625369992e-06, "logits/chosen": -0.6490907669067383, "logits/rejected": -0.6130737662315369, "logps/chosen": -155.4625244140625, "logps/rejected": -255.91937255859375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.1963882446289062, "rewards/margins": 11.61773681640625, "rewards/rejected": -12.814123153686523, "step": 1824 }, { "epoch": 0.62, "learning_rate": 1.6126275112813883e-06, "logits/chosen": -0.6950079798698425, "logits/rejected": -0.684057354927063, "logps/chosen": -203.7401123046875, "logps/rejected": -372.9874572753906, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.5399253368377686, "rewards/margins": 14.598806381225586, "rewards/rejected": -16.13873291015625, "step": 1825 }, { "epoch": 0.62, "learning_rate": 1.612190572876527e-06, "logits/chosen": -0.7017862796783447, "logits/rejected": -0.6833415031433105, "logps/chosen": -221.21881103515625, "logps/rejected": -341.2772521972656, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -1.8047188520431519, "rewards/margins": 13.056900024414062, "rewards/rejected": -14.861618041992188, "step": 1826 }, { "epoch": 0.62, "learning_rate": 1.6117534474558943e-06, "logits/chosen": -0.653829038143158, "logits/rejected": -0.6105969548225403, "logps/chosen": -213.21817016601562, "logps/rejected": -332.63372802734375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.028149127960205, "rewards/margins": 13.934844970703125, "rewards/rejected": -14.962995529174805, "step": 1827 }, { "epoch": 0.62, "learning_rate": 1.6113161351530257e-06, "logits/chosen": -0.6583951115608215, "logits/rejected": -0.6289989352226257, "logps/chosen": -224.36439514160156, "logps/rejected": -382.5829162597656, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.9781370162963867, "rewards/margins": 16.2351016998291, "rewards/rejected": -17.213239669799805, "step": 1828 }, { "epoch": 0.62, "learning_rate": 1.6108786361015143e-06, "logits/chosen": -0.6679890155792236, "logits/rejected": -0.6341540813446045, "logps/chosen": -169.4267578125, "logps/rejected": -289.205078125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.6172977089881897, "rewards/margins": 15.435080528259277, "rewards/rejected": -16.052377700805664, "step": 1829 }, { "epoch": 0.62, "learning_rate": 1.6104409504350096e-06, "logits/chosen": -0.563491702079773, "logits/rejected": -0.5573616623878479, "logps/chosen": -95.38969421386719, "logps/rejected": -233.6699676513672, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.424480676651001, "rewards/margins": 11.139538764953613, "rewards/rejected": -12.564020156860352, "step": 1830 }, { "epoch": 0.62, "learning_rate": 1.6100030782872191e-06, "logits/chosen": -0.6442202925682068, "logits/rejected": -0.6407922506332397, "logps/chosen": -192.6269073486328, "logps/rejected": -326.3292541503906, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -1.876766562461853, "rewards/margins": 10.31524658203125, "rewards/rejected": -12.19201374053955, "step": 1831 }, { "epoch": 0.63, "learning_rate": 1.6095650197919062e-06, "logits/chosen": -0.5478729605674744, "logits/rejected": -0.5288069844245911, "logps/chosen": -139.59786987304688, "logps/rejected": -257.9109802246094, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -0.6229605674743652, "rewards/margins": 13.087865829467773, "rewards/rejected": -13.71082592010498, "step": 1832 }, { "epoch": 0.63, "learning_rate": 1.6091267750828921e-06, "logits/chosen": -0.5995845198631287, "logits/rejected": -0.5655940771102905, "logps/chosen": -221.9186248779297, "logps/rejected": -356.219482421875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.989327609539032, "rewards/margins": 13.95888614654541, "rewards/rejected": -14.948213577270508, "step": 1833 }, { "epoch": 0.63, "learning_rate": 1.6086883442940543e-06, "logits/chosen": -0.6860987544059753, "logits/rejected": -0.6606544256210327, "logps/chosen": -230.6959686279297, "logps/rejected": -322.9530029296875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.17783965170383453, "rewards/margins": 13.012285232543945, "rewards/rejected": -12.834444999694824, "step": 1834 }, { "epoch": 0.63, "learning_rate": 1.6082497275593272e-06, "logits/chosen": -0.6716412305831909, "logits/rejected": -0.6453685164451599, "logps/chosen": -264.1575622558594, "logps/rejected": -407.78955078125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.12097057700157166, "rewards/margins": 15.36257553100586, "rewards/rejected": -15.483546257019043, "step": 1835 }, { "epoch": 0.63, "learning_rate": 1.6078109250127025e-06, "logits/chosen": -0.6776800155639648, "logits/rejected": -0.6364433765411377, "logps/chosen": -187.64865112304688, "logps/rejected": -260.7000732421875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.8129858374595642, "rewards/margins": 10.893050193786621, "rewards/rejected": -11.706036567687988, "step": 1836 }, { "epoch": 0.63, "learning_rate": 1.6073719367882284e-06, "logits/chosen": -0.5715484619140625, "logits/rejected": -0.5399234890937805, "logps/chosen": -194.6008758544922, "logps/rejected": -298.052978515625, "loss": 0.0316, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4913041591644287, "rewards/margins": 13.352323532104492, "rewards/rejected": -14.8436279296875, "step": 1837 }, { "epoch": 0.63, "learning_rate": 1.6069327630200094e-06, "logits/chosen": -0.6727150082588196, "logits/rejected": -0.6437751650810242, "logps/chosen": -231.79995727539062, "logps/rejected": -298.8944396972656, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -2.1101324558258057, "rewards/margins": 10.227684020996094, "rewards/rejected": -12.337818145751953, "step": 1838 }, { "epoch": 0.63, "learning_rate": 1.6064934038422069e-06, "logits/chosen": -0.6884828805923462, "logits/rejected": -0.6458860635757446, "logps/chosen": -188.80462646484375, "logps/rejected": -280.5212097167969, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.2893243432044983, "rewards/margins": 12.38642406463623, "rewards/rejected": -12.097099304199219, "step": 1839 }, { "epoch": 0.63, "learning_rate": 1.6060538593890397e-06, "logits/chosen": -0.5699988007545471, "logits/rejected": -0.5328794121742249, "logps/chosen": -166.92788696289062, "logps/rejected": -271.0169677734375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9051927328109741, "rewards/margins": 13.65290641784668, "rewards/rejected": -14.558099746704102, "step": 1840 }, { "epoch": 0.63, "learning_rate": 1.6056141297947819e-06, "logits/chosen": -0.6747637987136841, "logits/rejected": -0.6181460618972778, "logps/chosen": -183.85348510742188, "logps/rejected": -255.8982391357422, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.9685618877410889, "rewards/margins": 10.142524719238281, "rewards/rejected": -11.11108684539795, "step": 1841 }, { "epoch": 0.63, "learning_rate": 1.6051742151937652e-06, "logits/chosen": -0.6217928528785706, "logits/rejected": -0.574935257434845, "logps/chosen": -199.59896850585938, "logps/rejected": -316.0766296386719, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -1.7538954019546509, "rewards/margins": 14.222577095031738, "rewards/rejected": -15.976472854614258, "step": 1842 }, { "epoch": 0.63, "learning_rate": 1.6047341157203772e-06, "logits/chosen": -0.7710475325584412, "logits/rejected": -0.7465260028839111, "logps/chosen": -236.05909729003906, "logps/rejected": -363.4889831542969, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.4375074505805969, "rewards/margins": 14.61097526550293, "rewards/rejected": -15.048481941223145, "step": 1843 }, { "epoch": 0.63, "learning_rate": 1.6042938315090626e-06, "logits/chosen": -0.5878826379776001, "logits/rejected": -0.5580358505249023, "logps/chosen": -153.491943359375, "logps/rejected": -229.53746032714844, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.3257274627685547, "rewards/margins": 11.394538879394531, "rewards/rejected": -11.720267295837402, "step": 1844 }, { "epoch": 0.63, "learning_rate": 1.6038533626943216e-06, "logits/chosen": -0.5771725177764893, "logits/rejected": -0.5548776984214783, "logps/chosen": -140.0635986328125, "logps/rejected": -240.41612243652344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8520775437355042, "rewards/margins": 12.237287521362305, "rewards/rejected": -13.089364051818848, "step": 1845 }, { "epoch": 0.63, "learning_rate": 1.6034127094107119e-06, "logits/chosen": -0.6637371778488159, "logits/rejected": -0.6420668959617615, "logps/chosen": -216.7363739013672, "logps/rejected": -322.749267578125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.753509283065796, "rewards/margins": 12.005168914794922, "rewards/rejected": -13.75867748260498, "step": 1846 }, { "epoch": 0.63, "learning_rate": 1.6029718717928468e-06, "logits/chosen": -0.5925689339637756, "logits/rejected": -0.5591711401939392, "logps/chosen": -218.67465209960938, "logps/rejected": -362.79095458984375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.35216549038887024, "rewards/margins": 15.858723640441895, "rewards/rejected": -16.210887908935547, "step": 1847 }, { "epoch": 0.63, "learning_rate": 1.602530849975396e-06, "logits/chosen": -0.5817137956619263, "logits/rejected": -0.5222780704498291, "logps/chosen": -230.32772827148438, "logps/rejected": -277.70025634765625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.18956628441810608, "rewards/margins": 12.774261474609375, "rewards/rejected": -12.963827133178711, "step": 1848 }, { "epoch": 0.63, "learning_rate": 1.6020896440930857e-06, "logits/chosen": -0.601885199546814, "logits/rejected": -0.5755065083503723, "logps/chosen": -203.78744506835938, "logps/rejected": -318.68817138671875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.1594921201467514, "rewards/margins": 13.123649597167969, "rewards/rejected": -13.283141136169434, "step": 1849 }, { "epoch": 0.63, "learning_rate": 1.6016482542806982e-06, "logits/chosen": -0.6333526372909546, "logits/rejected": -0.607063889503479, "logps/chosen": -249.33596801757812, "logps/rejected": -393.77630615234375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7429273724555969, "rewards/margins": 15.241568565368652, "rewards/rejected": -15.984496116638184, "step": 1850 }, { "epoch": 0.63, "learning_rate": 1.6012066806730715e-06, "logits/chosen": -0.6868420243263245, "logits/rejected": -0.6456884145736694, "logps/chosen": -266.2129211425781, "logps/rejected": -361.00042724609375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.5184305906295776, "rewards/margins": 14.177024841308594, "rewards/rejected": -14.695454597473145, "step": 1851 }, { "epoch": 0.63, "learning_rate": 1.600764923405101e-06, "logits/chosen": -0.6166717410087585, "logits/rejected": -0.5791974067687988, "logps/chosen": -242.6670684814453, "logps/rejected": -365.4920349121094, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.6197919845581055, "rewards/margins": 13.027192115783691, "rewards/rejected": -14.64698314666748, "step": 1852 }, { "epoch": 0.63, "learning_rate": 1.6003229826117372e-06, "logits/chosen": -0.5674204230308533, "logits/rejected": -0.5520009398460388, "logps/chosen": -183.5094451904297, "logps/rejected": -306.091552734375, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -1.4165514707565308, "rewards/margins": 11.498269081115723, "rewards/rejected": -12.914822578430176, "step": 1853 }, { "epoch": 0.63, "learning_rate": 1.5998808584279868e-06, "logits/chosen": -0.5850228667259216, "logits/rejected": -0.5779159069061279, "logps/chosen": -166.91085815429688, "logps/rejected": -314.3648681640625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.1701023578643799, "rewards/margins": 13.532638549804688, "rewards/rejected": -14.702741622924805, "step": 1854 }, { "epoch": 0.63, "learning_rate": 1.5994385509889128e-06, "logits/chosen": -0.6075174808502197, "logits/rejected": -0.6040363311767578, "logps/chosen": -256.34503173828125, "logps/rejected": -402.72760009765625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643040031194687, "rewards/margins": 13.51612663269043, "rewards/rejected": -13.680429458618164, "step": 1855 }, { "epoch": 0.63, "learning_rate": 1.5989960604296337e-06, "logits/chosen": -0.6003439426422119, "logits/rejected": -0.5925348997116089, "logps/chosen": -162.7651824951172, "logps/rejected": -299.3253173828125, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.9925919771194458, "rewards/margins": 12.998278617858887, "rewards/rejected": -13.990870475769043, "step": 1856 }, { "epoch": 0.63, "learning_rate": 1.5985533868853245e-06, "logits/chosen": -0.6617946028709412, "logits/rejected": -0.6347702145576477, "logps/chosen": -283.2048645019531, "logps/rejected": -427.5603942871094, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.18711698055267334, "rewards/margins": 16.5300235748291, "rewards/rejected": -16.717140197753906, "step": 1857 }, { "epoch": 0.63, "learning_rate": 1.5981105304912159e-06, "logits/chosen": -0.7675827145576477, "logits/rejected": -0.7367682456970215, "logps/chosen": -213.34658813476562, "logps/rejected": -372.9371337890625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.143496036529541, "rewards/margins": 14.251105308532715, "rewards/rejected": -15.394599914550781, "step": 1858 }, { "epoch": 0.63, "learning_rate": 1.5976674913825945e-06, "logits/chosen": -0.5607532262802124, "logits/rejected": -0.507413387298584, "logps/chosen": -207.5968475341797, "logps/rejected": -288.7896728515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0214670896530151, "rewards/margins": 14.098154067993164, "rewards/rejected": -15.119621276855469, "step": 1859 }, { "epoch": 0.63, "learning_rate": 1.5972242696948023e-06, "logits/chosen": -0.5255025029182434, "logits/rejected": -0.5170977711677551, "logps/chosen": -186.0392303466797, "logps/rejected": -364.39752197265625, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -1.5183405876159668, "rewards/margins": 15.311734199523926, "rewards/rejected": -16.830074310302734, "step": 1860 }, { "epoch": 0.64, "learning_rate": 1.5967808655632374e-06, "logits/chosen": -0.6231938004493713, "logits/rejected": -0.6153779029846191, "logps/chosen": -211.39724731445312, "logps/rejected": -359.98663330078125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.579946756362915, "rewards/margins": 13.346406936645508, "rewards/rejected": -14.926353454589844, "step": 1861 }, { "epoch": 0.64, "learning_rate": 1.596337279123354e-06, "logits/chosen": -0.5881773233413696, "logits/rejected": -0.5670499205589294, "logps/chosen": -151.65673828125, "logps/rejected": -262.28302001953125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.8201072216033936, "rewards/margins": 11.539266586303711, "rewards/rejected": -13.359374046325684, "step": 1862 }, { "epoch": 0.64, "learning_rate": 1.5958935105106616e-06, "logits/chosen": -0.6570792198181152, "logits/rejected": -0.6640037894248962, "logps/chosen": -209.60523986816406, "logps/rejected": -374.6654052734375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6842007637023926, "rewards/margins": 12.391311645507812, "rewards/rejected": -14.07551097869873, "step": 1863 }, { "epoch": 0.64, "learning_rate": 1.595449559860725e-06, "logits/chosen": -0.5962007641792297, "logits/rejected": -0.5778887867927551, "logps/chosen": -155.98385620117188, "logps/rejected": -279.3828125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.7378211617469788, "rewards/margins": 12.81017780303955, "rewards/rejected": -13.54800033569336, "step": 1864 }, { "epoch": 0.64, "learning_rate": 1.5950054273091654e-06, "logits/chosen": -0.6009472608566284, "logits/rejected": -0.6014498472213745, "logps/chosen": -195.39227294921875, "logps/rejected": -351.06793212890625, "loss": 0.0219, "rewards/accuracies": 0.9375, "rewards/chosen": -2.32077693939209, "rewards/margins": 13.685827255249023, "rewards/rejected": -16.006603240966797, "step": 1865 }, { "epoch": 0.64, "learning_rate": 1.5945611129916587e-06, "logits/chosen": -0.5014371275901794, "logits/rejected": -0.43531760573387146, "logps/chosen": -236.85255432128906, "logps/rejected": -241.72305297851562, "loss": 0.0334, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8220421075820923, "rewards/margins": 11.214284896850586, "rewards/rejected": -12.03632640838623, "step": 1866 }, { "epoch": 0.64, "learning_rate": 1.5941166170439374e-06, "logits/chosen": -0.6588895320892334, "logits/rejected": -0.6209096312522888, "logps/chosen": -221.84178161621094, "logps/rejected": -283.8438720703125, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -1.8420062065124512, "rewards/margins": 10.773329734802246, "rewards/rejected": -12.615335464477539, "step": 1867 }, { "epoch": 0.64, "learning_rate": 1.5936719396017888e-06, "logits/chosen": -0.6099976301193237, "logits/rejected": -0.5851601362228394, "logps/chosen": -193.61990356445312, "logps/rejected": -314.8055725097656, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.39022818207740784, "rewards/margins": 14.939887046813965, "rewards/rejected": -14.549657821655273, "step": 1868 }, { "epoch": 0.64, "learning_rate": 1.593227080801055e-06, "logits/chosen": -0.6297848224639893, "logits/rejected": -0.6169610023498535, "logps/chosen": -202.52792358398438, "logps/rejected": -343.5909423828125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.08437608182430267, "rewards/margins": 14.7249116897583, "rewards/rejected": -14.809286117553711, "step": 1869 }, { "epoch": 0.64, "learning_rate": 1.5927820407776352e-06, "logits/chosen": -0.5807130336761475, "logits/rejected": -0.548587441444397, "logps/chosen": -248.89735412597656, "logps/rejected": -353.2359313964844, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.864434003829956, "rewards/margins": 13.15933609008789, "rewards/rejected": -14.023770332336426, "step": 1870 }, { "epoch": 0.64, "learning_rate": 1.5923368196674823e-06, "logits/chosen": -0.5729086995124817, "logits/rejected": -0.580865204334259, "logps/chosen": -164.98007202148438, "logps/rejected": -321.9933776855469, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5604052543640137, "rewards/margins": 12.18294620513916, "rewards/rejected": -13.743350982666016, "step": 1871 }, { "epoch": 0.64, "learning_rate": 1.5918914176066052e-06, "logits/chosen": -0.5543191432952881, "logits/rejected": -0.5158621072769165, "logps/chosen": -196.70565795898438, "logps/rejected": -295.40924072265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1507848501205444, "rewards/margins": 13.005002975463867, "rewards/rejected": -14.155787467956543, "step": 1872 }, { "epoch": 0.64, "learning_rate": 1.5914458347310686e-06, "logits/chosen": -0.6003579497337341, "logits/rejected": -0.564155638217926, "logps/chosen": -259.4253845214844, "logps/rejected": -349.07867431640625, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.1376330852508545, "rewards/margins": 14.78913402557373, "rewards/rejected": -15.926767349243164, "step": 1873 }, { "epoch": 0.64, "learning_rate": 1.5910000711769916e-06, "logits/chosen": -0.5806615352630615, "logits/rejected": -0.5687137842178345, "logps/chosen": -196.45407104492188, "logps/rejected": -354.6111755371094, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.0708022117614746, "rewards/margins": 12.788103103637695, "rewards/rejected": -13.858906745910645, "step": 1874 }, { "epoch": 0.64, "learning_rate": 1.5905541270805486e-06, "logits/chosen": -0.5670149922370911, "logits/rejected": -0.557425856590271, "logps/chosen": -224.69435119628906, "logps/rejected": -430.5008850097656, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.7629189491271973, "rewards/margins": 17.063871383666992, "rewards/rejected": -18.826791763305664, "step": 1875 }, { "epoch": 0.64, "learning_rate": 1.59010800257797e-06, "logits/chosen": -0.5893014073371887, "logits/rejected": -0.5813995003700256, "logps/chosen": -170.86965942382812, "logps/rejected": -313.7492980957031, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.14546123147010803, "rewards/margins": 13.97314167022705, "rewards/rejected": -14.118602752685547, "step": 1876 }, { "epoch": 0.64, "learning_rate": 1.5896616978055398e-06, "logits/chosen": -0.6578770279884338, "logits/rejected": -0.6288890242576599, "logps/chosen": -272.0355224609375, "logps/rejected": -417.9839782714844, "loss": 0.0251, "rewards/accuracies": 0.9375, "rewards/chosen": 0.17942559719085693, "rewards/margins": 16.359664916992188, "rewards/rejected": -16.180240631103516, "step": 1877 }, { "epoch": 0.64, "learning_rate": 1.5892152128995988e-06, "logits/chosen": -0.5742455720901489, "logits/rejected": -0.5347045660018921, "logps/chosen": -199.74612426757812, "logps/rejected": -318.2051086425781, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.19388145208358765, "rewards/margins": 13.415610313415527, "rewards/rejected": -13.609491348266602, "step": 1878 }, { "epoch": 0.64, "learning_rate": 1.5887685479965413e-06, "logits/chosen": -0.5974829792976379, "logits/rejected": -0.5865409970283508, "logps/chosen": -160.09783935546875, "logps/rejected": -284.7896728515625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.560816764831543, "rewards/margins": 11.15382194519043, "rewards/rejected": -13.714637756347656, "step": 1879 }, { "epoch": 0.64, "learning_rate": 1.588321703232818e-06, "logits/chosen": -0.5874870419502258, "logits/rejected": -0.5517985224723816, "logps/chosen": -211.67037963867188, "logps/rejected": -322.4521484375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.030310869216919, "rewards/margins": 13.517183303833008, "rewards/rejected": -15.547492980957031, "step": 1880 }, { "epoch": 0.64, "learning_rate": 1.5878746787449332e-06, "logits/chosen": -0.5420972108840942, "logits/rejected": -0.5023515820503235, "logps/chosen": -217.0877227783203, "logps/rejected": -405.6159362792969, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.4968321323394775, "rewards/margins": 17.800613403320312, "rewards/rejected": -20.297447204589844, "step": 1881 }, { "epoch": 0.64, "learning_rate": 1.5874274746694467e-06, "logits/chosen": -0.5421740412712097, "logits/rejected": -0.5178191065788269, "logps/chosen": -257.1073303222656, "logps/rejected": -407.9979553222656, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.4335194826126099, "rewards/margins": 15.300966262817383, "rewards/rejected": -16.73448371887207, "step": 1882 }, { "epoch": 0.64, "learning_rate": 1.5869800911429735e-06, "logits/chosen": -0.5518007278442383, "logits/rejected": -0.5034643411636353, "logps/chosen": -226.62315368652344, "logps/rejected": -343.7767333984375, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -1.2936021089553833, "rewards/margins": 14.981887817382812, "rewards/rejected": -16.275489807128906, "step": 1883 }, { "epoch": 0.64, "learning_rate": 1.5865325283021828e-06, "logits/chosen": -0.6152445077896118, "logits/rejected": -0.5662600994110107, "logps/chosen": -217.56655883789062, "logps/rejected": -286.449462890625, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -0.9754086136817932, "rewards/margins": 10.826590538024902, "rewards/rejected": -11.802000045776367, "step": 1884 }, { "epoch": 0.64, "learning_rate": 1.5860847862837995e-06, "logits/chosen": -0.538110613822937, "logits/rejected": -0.5143043398857117, "logps/chosen": -198.581787109375, "logps/rejected": -361.72528076171875, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.1420023441314697, "rewards/margins": 14.974530220031738, "rewards/rejected": -17.116531372070312, "step": 1885 }, { "epoch": 0.64, "learning_rate": 1.5856368652246017e-06, "logits/chosen": -0.5313919186592102, "logits/rejected": -0.5217207670211792, "logps/chosen": -183.99957275390625, "logps/rejected": -316.49169921875, "loss": 0.0418, "rewards/accuracies": 0.9375, "rewards/chosen": -0.799959123134613, "rewards/margins": 13.12647533416748, "rewards/rejected": -13.926433563232422, "step": 1886 }, { "epoch": 0.64, "learning_rate": 1.5851887652614235e-06, "logits/chosen": -0.42924368381500244, "logits/rejected": -0.4162362217903137, "logps/chosen": -113.7322769165039, "logps/rejected": -249.170166015625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.6414917707443237, "rewards/margins": 12.606019020080566, "rewards/rejected": -14.247509002685547, "step": 1887 }, { "epoch": 0.64, "learning_rate": 1.5847404865311536e-06, "logits/chosen": -0.547601044178009, "logits/rejected": -0.5327886939048767, "logps/chosen": -165.94073486328125, "logps/rejected": -302.4586181640625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.0011210441589355, "rewards/margins": 12.299217224121094, "rewards/rejected": -13.300338745117188, "step": 1888 }, { "epoch": 0.64, "learning_rate": 1.5842920291707345e-06, "logits/chosen": -0.6303300261497498, "logits/rejected": -0.6089810729026794, "logps/chosen": -155.14749145507812, "logps/rejected": -273.7999572753906, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.41361069679260254, "rewards/margins": 13.59427261352539, "rewards/rejected": -14.007883071899414, "step": 1889 }, { "epoch": 0.65, "learning_rate": 1.583843393317164e-06, "logits/chosen": -0.48629602789878845, "logits/rejected": -0.48113277554512024, "logps/chosen": -142.89739990234375, "logps/rejected": -220.17071533203125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.5763257145881653, "rewards/margins": 9.432842254638672, "rewards/rejected": -10.00916862487793, "step": 1890 }, { "epoch": 0.65, "learning_rate": 1.583394579107494e-06, "logits/chosen": -0.5730093121528625, "logits/rejected": -0.5692026019096375, "logps/chosen": -119.66017150878906, "logps/rejected": -264.01715087890625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.7185156345367432, "rewards/margins": 12.124391555786133, "rewards/rejected": -13.84290599822998, "step": 1891 }, { "epoch": 0.65, "learning_rate": 1.582945586678831e-06, "logits/chosen": -0.5413352847099304, "logits/rejected": -0.5389790534973145, "logps/chosen": -199.2470245361328, "logps/rejected": -332.4158630371094, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.9893308877944946, "rewards/margins": 12.094797134399414, "rewards/rejected": -14.084127426147461, "step": 1892 }, { "epoch": 0.65, "learning_rate": 1.5824964161683362e-06, "logits/chosen": -0.523550808429718, "logits/rejected": -0.493649959564209, "logps/chosen": -211.58160400390625, "logps/rejected": -321.22332763671875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.9291720390319824, "rewards/margins": 12.952496528625488, "rewards/rejected": -14.881668090820312, "step": 1893 }, { "epoch": 0.65, "learning_rate": 1.5820470677132246e-06, "logits/chosen": -0.5484462976455688, "logits/rejected": -0.5399672389030457, "logps/chosen": -169.90501403808594, "logps/rejected": -292.7143249511719, "loss": 0.0807, "rewards/accuracies": 0.9375, "rewards/chosen": 0.04170294851064682, "rewards/margins": 9.657512664794922, "rewards/rejected": -9.61581039428711, "step": 1894 }, { "epoch": 0.65, "learning_rate": 1.5815975414507667e-06, "logits/chosen": -0.6329919695854187, "logits/rejected": -0.5832763910293579, "logps/chosen": -285.8906555175781, "logps/rejected": -402.16326904296875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.3967257738113403, "rewards/margins": 14.704490661621094, "rewards/rejected": -16.101215362548828, "step": 1895 }, { "epoch": 0.65, "learning_rate": 1.5811478375182859e-06, "logits/chosen": -0.601353645324707, "logits/rejected": -0.5916205048561096, "logps/chosen": -217.28330993652344, "logps/rejected": -402.0509033203125, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -1.1240019798278809, "rewards/margins": 14.409107208251953, "rewards/rejected": -15.533109664916992, "step": 1896 }, { "epoch": 0.65, "learning_rate": 1.5806979560531608e-06, "logits/chosen": -0.5020997524261475, "logits/rejected": -0.4986448585987091, "logps/chosen": -143.56649780273438, "logps/rejected": -289.55010986328125, "loss": 0.0283, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4630787372589111, "rewards/margins": 11.60366439819336, "rewards/rejected": -13.066743850708008, "step": 1897 }, { "epoch": 0.65, "learning_rate": 1.580247897192824e-06, "logits/chosen": -0.6226702332496643, "logits/rejected": -0.5864759683609009, "logps/chosen": -236.61883544921875, "logps/rejected": -333.12823486328125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.031335711479187, "rewards/margins": 12.862783432006836, "rewards/rejected": -13.894118309020996, "step": 1898 }, { "epoch": 0.65, "learning_rate": 1.579797661074762e-06, "logits/chosen": -0.5195577144622803, "logits/rejected": -0.4742254614830017, "logps/chosen": -258.21685791015625, "logps/rejected": -333.97821044921875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.2384945154190063, "rewards/margins": 13.108970642089844, "rewards/rejected": -14.347465515136719, "step": 1899 }, { "epoch": 0.65, "learning_rate": 1.579347247836516e-06, "logits/chosen": -0.6812607645988464, "logits/rejected": -0.6533586382865906, "logps/chosen": -171.48822021484375, "logps/rejected": -247.5332489013672, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.7051788568496704, "rewards/margins": 9.678133010864258, "rewards/rejected": -11.38331127166748, "step": 1900 }, { "epoch": 0.65, "learning_rate": 1.5788966576156808e-06, "logits/chosen": -0.5841655135154724, "logits/rejected": -0.5617212653160095, "logps/chosen": -224.59242248535156, "logps/rejected": -361.2674560546875, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -2.8733654022216797, "rewards/margins": 12.346025466918945, "rewards/rejected": -15.219390869140625, "step": 1901 }, { "epoch": 0.65, "learning_rate": 1.5784458905499059e-06, "logits/chosen": -0.6402921676635742, "logits/rejected": -0.6036795973777771, "logps/chosen": -225.796142578125, "logps/rejected": -314.87701416015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.5124404430389404, "rewards/margins": 12.72683334350586, "rewards/rejected": -14.239273071289062, "step": 1902 }, { "epoch": 0.65, "learning_rate": 1.577994946776894e-06, "logits/chosen": -0.6473708748817444, "logits/rejected": -0.6068711280822754, "logps/chosen": -153.0575408935547, "logps/rejected": -233.7794189453125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.7165471315383911, "rewards/margins": 9.88819694519043, "rewards/rejected": -10.604744911193848, "step": 1903 }, { "epoch": 0.65, "learning_rate": 1.5775438264344026e-06, "logits/chosen": -0.60455322265625, "logits/rejected": -0.5853511691093445, "logps/chosen": -187.50680541992188, "logps/rejected": -344.4598693847656, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.8542728424072266, "rewards/margins": 14.55531120300293, "rewards/rejected": -16.409582138061523, "step": 1904 }, { "epoch": 0.65, "learning_rate": 1.5770925296602423e-06, "logits/chosen": -0.7499858140945435, "logits/rejected": -0.7342225909233093, "logps/chosen": -216.79368591308594, "logps/rejected": -377.8565979003906, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.7750450372695923, "rewards/margins": 15.603919982910156, "rewards/rejected": -16.378965377807617, "step": 1905 }, { "epoch": 0.65, "learning_rate": 1.5766410565922784e-06, "logits/chosen": -0.5352760553359985, "logits/rejected": -0.5146178007125854, "logps/chosen": -73.79720306396484, "logps/rejected": -144.70091247558594, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.4148814082145691, "rewards/margins": 9.081521987915039, "rewards/rejected": -9.496402740478516, "step": 1906 }, { "epoch": 0.65, "learning_rate": 1.5761894073684295e-06, "logits/chosen": -0.6186084151268005, "logits/rejected": -0.5888910293579102, "logps/chosen": -204.69384765625, "logps/rejected": -311.82550048828125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.5073121786117554, "rewards/margins": 11.767288208007812, "rewards/rejected": -12.2746000289917, "step": 1907 }, { "epoch": 0.65, "learning_rate": 1.5757375821266678e-06, "logits/chosen": -0.6135984659194946, "logits/rejected": -0.586296558380127, "logps/chosen": -155.92857360839844, "logps/rejected": -231.38742065429688, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.275749921798706, "rewards/margins": 8.82099723815918, "rewards/rejected": -10.096747398376465, "step": 1908 }, { "epoch": 0.65, "learning_rate": 1.5752855810050204e-06, "logits/chosen": -0.6325722336769104, "logits/rejected": -0.6058209538459778, "logps/chosen": -216.10134887695312, "logps/rejected": -306.8981628417969, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.3474876880645752, "rewards/margins": 9.902090072631836, "rewards/rejected": -11.249577522277832, "step": 1909 }, { "epoch": 0.65, "learning_rate": 1.5748334041415671e-06, "logits/chosen": -0.6226819157600403, "logits/rejected": -0.6050423979759216, "logps/chosen": -228.89576721191406, "logps/rejected": -389.39093017578125, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.7584266662597656, "rewards/margins": 13.964336395263672, "rewards/rejected": -15.722763061523438, "step": 1910 }, { "epoch": 0.65, "learning_rate": 1.574381051674442e-06, "logits/chosen": -0.5913137197494507, "logits/rejected": -0.5598888397216797, "logps/chosen": -166.999755859375, "logps/rejected": -320.21002197265625, "loss": 0.034, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3685221672058105, "rewards/margins": 13.269547462463379, "rewards/rejected": -14.638070106506348, "step": 1911 }, { "epoch": 0.65, "learning_rate": 1.573928523741832e-06, "logits/chosen": -0.5738010406494141, "logits/rejected": -0.5773450136184692, "logps/chosen": -149.27108764648438, "logps/rejected": -294.10272216796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124383568763733, "rewards/margins": 12.391834259033203, "rewards/rejected": -12.904273986816406, "step": 1912 }, { "epoch": 0.65, "learning_rate": 1.5734758204819785e-06, "logits/chosen": -0.6516256332397461, "logits/rejected": -0.614344596862793, "logps/chosen": -119.95535278320312, "logps/rejected": -251.466796875, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": -0.8927204608917236, "rewards/margins": 14.599248886108398, "rewards/rejected": -15.491969108581543, "step": 1913 }, { "epoch": 0.65, "learning_rate": 1.573022942033176e-06, "logits/chosen": -0.6279579997062683, "logits/rejected": -0.6034230589866638, "logps/chosen": -155.8906707763672, "logps/rejected": -291.5640869140625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.8196458220481873, "rewards/margins": 13.880019187927246, "rewards/rejected": -14.699663162231445, "step": 1914 }, { "epoch": 0.65, "learning_rate": 1.5725698885337724e-06, "logits/chosen": -0.6078457236289978, "logits/rejected": -0.5995045900344849, "logps/chosen": -179.44979858398438, "logps/rejected": -284.1784362792969, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.4934600591659546, "rewards/margins": 10.244845390319824, "rewards/rejected": -11.738306045532227, "step": 1915 }, { "epoch": 0.65, "learning_rate": 1.5721166601221695e-06, "logits/chosen": -0.502415120601654, "logits/rejected": -0.49844956398010254, "logps/chosen": -196.34413146972656, "logps/rejected": -317.4997863769531, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.0459591150283813, "rewards/margins": 11.58290958404541, "rewards/rejected": -12.628868103027344, "step": 1916 }, { "epoch": 0.65, "learning_rate": 1.5716632569368225e-06, "logits/chosen": -0.546951174736023, "logits/rejected": -0.5261477828025818, "logps/chosen": -180.7596435546875, "logps/rejected": -318.56787109375, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.790442943572998, "rewards/margins": 14.542848587036133, "rewards/rejected": -15.333292007446289, "step": 1917 }, { "epoch": 0.65, "learning_rate": 1.5712096791162398e-06, "logits/chosen": -0.6125050783157349, "logits/rejected": -0.6008146405220032, "logps/chosen": -202.43397521972656, "logps/rejected": -411.3439025878906, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8081592321395874, "rewards/margins": 16.96303939819336, "rewards/rejected": -17.771198272705078, "step": 1918 }, { "epoch": 0.65, "learning_rate": 1.5707559267989825e-06, "logits/chosen": -0.7671182751655579, "logits/rejected": -0.7436532378196716, "logps/chosen": -243.37693786621094, "logps/rejected": -337.0749816894531, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.6565426588058472, "rewards/margins": 10.970964431762695, "rewards/rejected": -11.627507209777832, "step": 1919 }, { "epoch": 0.66, "learning_rate": 1.5703020001236665e-06, "logits/chosen": -0.575982391834259, "logits/rejected": -0.56671541929245, "logps/chosen": -204.04116821289062, "logps/rejected": -355.7106628417969, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.076041579246521, "rewards/margins": 13.717292785644531, "rewards/rejected": -14.793333053588867, "step": 1920 }, { "epoch": 0.66, "learning_rate": 1.5698478992289594e-06, "logits/chosen": -0.6679733395576477, "logits/rejected": -0.6413601636886597, "logps/chosen": -245.14781188964844, "logps/rejected": -374.6229248046875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4638445377349854, "rewards/margins": 14.3632173538208, "rewards/rejected": -15.827061653137207, "step": 1921 }, { "epoch": 0.66, "learning_rate": 1.5693936242535836e-06, "logits/chosen": -0.601310670375824, "logits/rejected": -0.5896716117858887, "logps/chosen": -220.46307373046875, "logps/rejected": -351.88616943359375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.16900280117988586, "rewards/margins": 15.615863800048828, "rewards/rejected": -15.784867286682129, "step": 1922 }, { "epoch": 0.66, "learning_rate": 1.5689391753363132e-06, "logits/chosen": -0.6060522794723511, "logits/rejected": -0.5610723495483398, "logps/chosen": -177.22653198242188, "logps/rejected": -272.8711242675781, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.475989818572998, "rewards/margins": 11.917935371398926, "rewards/rejected": -13.393925666809082, "step": 1923 }, { "epoch": 0.66, "learning_rate": 1.5684845526159765e-06, "logits/chosen": -0.6251778602600098, "logits/rejected": -0.6143495440483093, "logps/chosen": -194.73345947265625, "logps/rejected": -324.76605224609375, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -1.3138089179992676, "rewards/margins": 13.974750518798828, "rewards/rejected": -15.288558006286621, "step": 1924 }, { "epoch": 0.66, "learning_rate": 1.5680297562314539e-06, "logits/chosen": -0.6164482235908508, "logits/rejected": -0.6072350144386292, "logps/chosen": -248.9562225341797, "logps/rejected": -428.85552978515625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.31532907485961914, "rewards/margins": 16.60386848449707, "rewards/rejected": -16.919198989868164, "step": 1925 }, { "epoch": 0.66, "learning_rate": 1.5675747863216799e-06, "logits/chosen": -0.5880219340324402, "logits/rejected": -0.5641760230064392, "logps/chosen": -162.2661895751953, "logps/rejected": -309.72833251953125, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.03901495039463043, "rewards/margins": 13.901692390441895, "rewards/rejected": -13.940707206726074, "step": 1926 }, { "epoch": 0.66, "learning_rate": 1.5671196430256414e-06, "logits/chosen": -0.6003931760787964, "logits/rejected": -0.5801406502723694, "logps/chosen": -167.35293579101562, "logps/rejected": -255.50277709960938, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.7228078842163086, "rewards/margins": 9.418307304382324, "rewards/rejected": -10.141115188598633, "step": 1927 }, { "epoch": 0.66, "learning_rate": 1.5666643264823784e-06, "logits/chosen": -0.6731566786766052, "logits/rejected": -0.6148266792297363, "logps/chosen": -288.9949951171875, "logps/rejected": -287.87408447265625, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.44593989849090576, "rewards/margins": 10.175773620605469, "rewards/rejected": -10.621713638305664, "step": 1928 }, { "epoch": 0.66, "learning_rate": 1.5662088368309838e-06, "logits/chosen": -0.5289969444274902, "logits/rejected": -0.4830957055091858, "logps/chosen": -246.46551513671875, "logps/rejected": -386.4679870605469, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.2305185794830322, "rewards/margins": 16.770936965942383, "rewards/rejected": -18.001455307006836, "step": 1929 }, { "epoch": 0.66, "learning_rate": 1.5657531742106034e-06, "logits/chosen": -0.6016548871994019, "logits/rejected": -0.5789222121238708, "logps/chosen": -167.60171508789062, "logps/rejected": -285.1607971191406, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.0027615204453468323, "rewards/margins": 14.830880165100098, "rewards/rejected": -14.828119277954102, "step": 1930 }, { "epoch": 0.66, "learning_rate": 1.5652973387604358e-06, "logits/chosen": -0.5313141942024231, "logits/rejected": -0.49033305048942566, "logps/chosen": -240.99923706054688, "logps/rejected": -283.13055419921875, "loss": 0.073, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4954190254211426, "rewards/margins": 11.609498977661133, "rewards/rejected": -12.104917526245117, "step": 1931 }, { "epoch": 0.66, "learning_rate": 1.5648413306197323e-06, "logits/chosen": -0.7075019478797913, "logits/rejected": -0.6532899141311646, "logps/chosen": -230.4801483154297, "logps/rejected": -354.0132141113281, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.7331584692001343, "rewards/margins": 14.63575267791748, "rewards/rejected": -15.36890983581543, "step": 1932 }, { "epoch": 0.66, "learning_rate": 1.5643851499277978e-06, "logits/chosen": -0.6416433453559875, "logits/rejected": -0.6209161281585693, "logps/chosen": -245.25955200195312, "logps/rejected": -413.9372253417969, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 0.8946864008903503, "rewards/margins": 17.340160369873047, "rewards/rejected": -16.44547462463379, "step": 1933 }, { "epoch": 0.66, "learning_rate": 1.5639287968239882e-06, "logits/chosen": -0.6441377401351929, "logits/rejected": -0.6265475749969482, "logps/chosen": -178.96371459960938, "logps/rejected": -306.9024658203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.25754231214523315, "rewards/margins": 12.48370361328125, "rewards/rejected": -12.741244316101074, "step": 1934 }, { "epoch": 0.66, "learning_rate": 1.5634722714477137e-06, "logits/chosen": -0.6256247758865356, "logits/rejected": -0.5866363644599915, "logps/chosen": -200.76087951660156, "logps/rejected": -316.318603515625, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.8247613310813904, "rewards/margins": 13.786396026611328, "rewards/rejected": -14.61115837097168, "step": 1935 }, { "epoch": 0.66, "learning_rate": 1.5630155739384362e-06, "logits/chosen": -0.6299317479133606, "logits/rejected": -0.597504198551178, "logps/chosen": -163.75218200683594, "logps/rejected": -322.34930419921875, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": -0.43491488695144653, "rewards/margins": 13.961812973022461, "rewards/rejected": -14.396728515625, "step": 1936 }, { "epoch": 0.66, "learning_rate": 1.5625587044356712e-06, "logits/chosen": -0.6800290942192078, "logits/rejected": -0.6426875591278076, "logps/chosen": -177.40379333496094, "logps/rejected": -229.4544677734375, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.4838295578956604, "rewards/margins": 9.90683650970459, "rewards/rejected": -10.390666007995605, "step": 1937 }, { "epoch": 0.66, "learning_rate": 1.562101663078985e-06, "logits/chosen": -0.651409924030304, "logits/rejected": -0.6320689916610718, "logps/chosen": -167.02476501464844, "logps/rejected": -302.60272216796875, "loss": 0.0255, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9510982036590576, "rewards/margins": 13.15908145904541, "rewards/rejected": -14.11017894744873, "step": 1938 }, { "epoch": 0.66, "learning_rate": 1.561644450007998e-06, "logits/chosen": -0.7387998104095459, "logits/rejected": -0.7107054591178894, "logps/chosen": -271.0892028808594, "logps/rejected": -396.38140869140625, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.2320612668991089, "rewards/margins": 14.820494651794434, "rewards/rejected": -15.052555084228516, "step": 1939 }, { "epoch": 0.66, "learning_rate": 1.5611870653623825e-06, "logits/chosen": -0.6325262784957886, "logits/rejected": -0.602647602558136, "logps/chosen": -160.55862426757812, "logps/rejected": -237.2437744140625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.1564074754714966, "rewards/margins": 10.728219032287598, "rewards/rejected": -11.884627342224121, "step": 1940 }, { "epoch": 0.66, "learning_rate": 1.5607295092818626e-06, "logits/chosen": -0.5255295634269714, "logits/rejected": -0.49976420402526855, "logps/chosen": -214.69436645507812, "logps/rejected": -286.4230041503906, "loss": 0.0412, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4337952136993408, "rewards/margins": 10.042490005493164, "rewards/rejected": -11.476284980773926, "step": 1941 }, { "epoch": 0.66, "learning_rate": 1.5602717819062166e-06, "logits/chosen": -0.5783224701881409, "logits/rejected": -0.5498647093772888, "logps/chosen": -182.0894012451172, "logps/rejected": -305.5000305175781, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -1.2524325847625732, "rewards/margins": 12.785489082336426, "rewards/rejected": -14.037919998168945, "step": 1942 }, { "epoch": 0.66, "learning_rate": 1.5598138833752726e-06, "logits/chosen": -0.6971690058708191, "logits/rejected": -0.679839015007019, "logps/chosen": -252.3790283203125, "logps/rejected": -406.7791442871094, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 0.42532098293304443, "rewards/margins": 15.98044204711914, "rewards/rejected": -15.555120468139648, "step": 1943 }, { "epoch": 0.66, "learning_rate": 1.5593558138289132e-06, "logits/chosen": -0.6103931069374084, "logits/rejected": -0.5993943214416504, "logps/chosen": -223.101806640625, "logps/rejected": -335.3033752441406, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6576220989227295, "rewards/margins": 12.879915237426758, "rewards/rejected": -14.537538528442383, "step": 1944 }, { "epoch": 0.66, "learning_rate": 1.5588975734070715e-06, "logits/chosen": -0.6754387021064758, "logits/rejected": -0.6241924166679382, "logps/chosen": -275.4754943847656, "logps/rejected": -329.2555847167969, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9645211100578308, "rewards/margins": 12.57507038116455, "rewards/rejected": -13.539591789245605, "step": 1945 }, { "epoch": 0.66, "learning_rate": 1.5584391622497343e-06, "logits/chosen": -0.6899770498275757, "logits/rejected": -0.6691863536834717, "logps/chosen": -171.33853149414062, "logps/rejected": -312.4205017089844, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 0.3220869302749634, "rewards/margins": 12.77564525604248, "rewards/rejected": -12.453559875488281, "step": 1946 }, { "epoch": 0.66, "learning_rate": 1.5579805804969398e-06, "logits/chosen": -0.6612421870231628, "logits/rejected": -0.6042534708976746, "logps/chosen": -200.8264923095703, "logps/rejected": -324.8067321777344, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": 0.059241265058517456, "rewards/margins": 14.538921356201172, "rewards/rejected": -14.479681015014648, "step": 1947 }, { "epoch": 0.66, "learning_rate": 1.557521828288778e-06, "logits/chosen": -0.5613462328910828, "logits/rejected": -0.55231112241745, "logps/chosen": -186.135986328125, "logps/rejected": -309.9522399902344, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.149613857269287, "rewards/margins": 10.366698265075684, "rewards/rejected": -11.516312599182129, "step": 1948 }, { "epoch": 0.67, "learning_rate": 1.5570629057653918e-06, "logits/chosen": -0.6495351791381836, "logits/rejected": -0.6091495156288147, "logps/chosen": -208.1773223876953, "logps/rejected": -333.5632019042969, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.6939620971679688, "rewards/margins": 12.293557167053223, "rewards/rejected": -13.987520217895508, "step": 1949 }, { "epoch": 0.67, "learning_rate": 1.5566038130669753e-06, "logits/chosen": -0.6300444602966309, "logits/rejected": -0.6109575629234314, "logps/chosen": -194.74208068847656, "logps/rejected": -294.1912841796875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.4015014171600342, "rewards/margins": 13.109785079956055, "rewards/rejected": -13.511287689208984, "step": 1950 }, { "epoch": 0.67, "learning_rate": 1.5561445503337755e-06, "logits/chosen": -0.8046060800552368, "logits/rejected": -0.7740018367767334, "logps/chosen": -215.02049255371094, "logps/rejected": -299.82684326171875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.41738319396972656, "rewards/margins": 11.036569595336914, "rewards/rejected": -11.453951835632324, "step": 1951 }, { "epoch": 0.67, "learning_rate": 1.5556851177060906e-06, "logits/chosen": -0.6199461221694946, "logits/rejected": -0.5932126641273499, "logps/chosen": -147.38027954101562, "logps/rejected": -222.98779296875, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -1.5878642797470093, "rewards/margins": 9.591015815734863, "rewards/rejected": -11.17888069152832, "step": 1952 }, { "epoch": 0.67, "learning_rate": 1.5552255153242708e-06, "logits/chosen": -0.5967533588409424, "logits/rejected": -0.5746690034866333, "logps/chosen": -192.70523071289062, "logps/rejected": -308.8408508300781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.17503604292869568, "rewards/margins": 12.902458190917969, "rewards/rejected": -13.077493667602539, "step": 1953 }, { "epoch": 0.67, "learning_rate": 1.5547657433287183e-06, "logits/chosen": -0.6720724701881409, "logits/rejected": -0.6333319544792175, "logps/chosen": -245.41909790039062, "logps/rejected": -379.7298889160156, "loss": 0.0198, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4262852668762207, "rewards/margins": 16.015060424804688, "rewards/rejected": -15.588775634765625, "step": 1954 }, { "epoch": 0.67, "learning_rate": 1.554305801859887e-06, "logits/chosen": -0.7020925879478455, "logits/rejected": -0.6754506826400757, "logps/chosen": -206.79798889160156, "logps/rejected": -339.4305725097656, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.146801233291626, "rewards/margins": 14.262166976928711, "rewards/rejected": -15.408968925476074, "step": 1955 }, { "epoch": 0.67, "learning_rate": 1.553845691058283e-06, "logits/chosen": -0.6388554573059082, "logits/rejected": -0.6264482140541077, "logps/chosen": -202.05169677734375, "logps/rejected": -339.5093994140625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.08741432428359985, "rewards/margins": 13.566267013549805, "rewards/rejected": -13.478854179382324, "step": 1956 }, { "epoch": 0.67, "learning_rate": 1.5533854110644636e-06, "logits/chosen": -0.607015073299408, "logits/rejected": -0.5769416093826294, "logps/chosen": -198.23861694335938, "logps/rejected": -282.8687744140625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 0.49283677339553833, "rewards/margins": 12.856372833251953, "rewards/rejected": -12.363536834716797, "step": 1957 }, { "epoch": 0.67, "learning_rate": 1.5529249620190376e-06, "logits/chosen": -0.5844073295593262, "logits/rejected": -0.5274362564086914, "logps/chosen": -159.78770446777344, "logps/rejected": -234.81411743164062, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7816275954246521, "rewards/margins": 10.931779861450195, "rewards/rejected": -11.713407516479492, "step": 1958 }, { "epoch": 0.67, "learning_rate": 1.5524643440626669e-06, "logits/chosen": -0.6568698287010193, "logits/rejected": -0.6099608540534973, "logps/chosen": -209.58253479003906, "logps/rejected": -259.16937255859375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.3613826036453247, "rewards/margins": 10.449037551879883, "rewards/rejected": -11.810420036315918, "step": 1959 }, { "epoch": 0.67, "learning_rate": 1.5520035573360627e-06, "logits/chosen": -0.6009365320205688, "logits/rejected": -0.5797829031944275, "logps/chosen": -213.2639923095703, "logps/rejected": -349.7462158203125, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -1.1514201164245605, "rewards/margins": 13.569164276123047, "rewards/rejected": -14.720584869384766, "step": 1960 }, { "epoch": 0.67, "learning_rate": 1.5515426019799897e-06, "logits/chosen": -0.5418373346328735, "logits/rejected": -0.5355930924415588, "logps/chosen": -180.19163513183594, "logps/rejected": -302.9786376953125, "loss": 0.047, "rewards/accuracies": 0.875, "rewards/chosen": -0.805526852607727, "rewards/margins": 12.158257484436035, "rewards/rejected": -12.963787078857422, "step": 1961 }, { "epoch": 0.67, "learning_rate": 1.5510814781352638e-06, "logits/chosen": -0.6548394560813904, "logits/rejected": -0.6432350277900696, "logps/chosen": -166.04063415527344, "logps/rejected": -310.30902099609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.1617987155914307, "rewards/margins": 13.167619705200195, "rewards/rejected": -14.32942008972168, "step": 1962 }, { "epoch": 0.67, "learning_rate": 1.5506201859427507e-06, "logits/chosen": -0.5613518953323364, "logits/rejected": -0.5563343167304993, "logps/chosen": -184.04246520996094, "logps/rejected": -309.7335205078125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -2.4437341690063477, "rewards/margins": 12.169459342956543, "rewards/rejected": -14.613192558288574, "step": 1963 }, { "epoch": 0.67, "learning_rate": 1.5501587255433696e-06, "logits/chosen": -0.5946441888809204, "logits/rejected": -0.5700687170028687, "logps/chosen": -247.66616821289062, "logps/rejected": -389.3730773925781, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.9447771906852722, "rewards/margins": 17.562623977661133, "rewards/rejected": -18.507400512695312, "step": 1964 }, { "epoch": 0.67, "learning_rate": 1.5496970970780904e-06, "logits/chosen": -0.6105471253395081, "logits/rejected": -0.5507136583328247, "logps/chosen": -267.3283386230469, "logps/rejected": -348.56842041015625, "loss": 0.0217, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5412967205047607, "rewards/margins": 13.333580017089844, "rewards/rejected": -14.8748779296875, "step": 1965 }, { "epoch": 0.67, "learning_rate": 1.5492353006879342e-06, "logits/chosen": -0.6608947515487671, "logits/rejected": -0.6433747410774231, "logps/chosen": -195.0243377685547, "logps/rejected": -337.19708251953125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.0986199378967285, "rewards/margins": 14.553254127502441, "rewards/rejected": -15.651874542236328, "step": 1966 }, { "epoch": 0.67, "learning_rate": 1.548773336513973e-06, "logits/chosen": -0.6832243204116821, "logits/rejected": -0.679989218711853, "logps/chosen": -265.8759765625, "logps/rejected": -436.82257080078125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.05540866404771805, "rewards/margins": 16.889081954956055, "rewards/rejected": -16.83367156982422, "step": 1967 }, { "epoch": 0.67, "learning_rate": 1.5483112046973307e-06, "logits/chosen": -0.5293093919754028, "logits/rejected": -0.5356664657592773, "logps/chosen": -192.4166259765625, "logps/rejected": -332.02264404296875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.9079108238220215, "rewards/margins": 11.339296340942383, "rewards/rejected": -13.247207641601562, "step": 1968 }, { "epoch": 0.67, "learning_rate": 1.5478489053791824e-06, "logits/chosen": -0.515248715877533, "logits/rejected": -0.5009140372276306, "logps/chosen": -108.43135070800781, "logps/rejected": -264.7403259277344, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.39967164397239685, "rewards/margins": 13.402660369873047, "rewards/rejected": -13.80233097076416, "step": 1969 }, { "epoch": 0.67, "learning_rate": 1.5473864387007539e-06, "logits/chosen": -0.6413459777832031, "logits/rejected": -0.6380268931388855, "logps/chosen": -226.4080047607422, "logps/rejected": -379.6352844238281, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.3562536239624023, "rewards/margins": 15.07400894165039, "rewards/rejected": -16.430261611938477, "step": 1970 }, { "epoch": 0.67, "learning_rate": 1.5469238048033226e-06, "logits/chosen": -0.572295606136322, "logits/rejected": -0.5606327056884766, "logps/chosen": -177.11573791503906, "logps/rejected": -304.4671630859375, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.372010350227356, "rewards/margins": 12.740011215209961, "rewards/rejected": -14.112021446228027, "step": 1971 }, { "epoch": 0.67, "learning_rate": 1.5464610038282166e-06, "logits/chosen": -0.6135634183883667, "logits/rejected": -0.5852428674697876, "logps/chosen": -181.8133087158203, "logps/rejected": -326.0145568847656, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.3681612014770508, "rewards/margins": 14.964305877685547, "rewards/rejected": -16.33246612548828, "step": 1972 }, { "epoch": 0.67, "learning_rate": 1.545998035916815e-06, "logits/chosen": -0.5907737016677856, "logits/rejected": -0.5830157995223999, "logps/chosen": -156.39303588867188, "logps/rejected": -332.6138000488281, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.07751701027154922, "rewards/margins": 12.481836318969727, "rewards/rejected": -12.559353828430176, "step": 1973 }, { "epoch": 0.67, "learning_rate": 1.5455349012105486e-06, "logits/chosen": -0.6855978965759277, "logits/rejected": -0.6836135387420654, "logps/chosen": -158.4136962890625, "logps/rejected": -317.25738525390625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.12258100509643555, "rewards/margins": 12.711713790893555, "rewards/rejected": -12.589133262634277, "step": 1974 }, { "epoch": 0.67, "learning_rate": 1.5450715998508982e-06, "logits/chosen": -0.6392734050750732, "logits/rejected": -0.6237840056419373, "logps/chosen": -141.24481201171875, "logps/rejected": -223.18179321289062, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.49547886848449707, "rewards/margins": 10.347084045410156, "rewards/rejected": -10.842561721801758, "step": 1975 }, { "epoch": 0.67, "learning_rate": 1.5446081319793966e-06, "logits/chosen": -0.6896092891693115, "logits/rejected": -0.6761747002601624, "logps/chosen": -204.44198608398438, "logps/rejected": -352.456298828125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.7563166618347168, "rewards/margins": 14.866964340209961, "rewards/rejected": -16.62328338623047, "step": 1976 }, { "epoch": 0.67, "learning_rate": 1.5441444977376263e-06, "logits/chosen": -0.5879216194152832, "logits/rejected": -0.550193190574646, "logps/chosen": -215.53294372558594, "logps/rejected": -307.47393798828125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.70575213432312, "rewards/margins": 11.655311584472656, "rewards/rejected": -14.361063957214355, "step": 1977 }, { "epoch": 0.68, "learning_rate": 1.5436806972672212e-06, "logits/chosen": -0.6414633393287659, "logits/rejected": -0.6450580954551697, "logps/chosen": -202.47328186035156, "logps/rejected": -369.0565185546875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.25761881470680237, "rewards/margins": 14.448685646057129, "rewards/rejected": -14.706304550170898, "step": 1978 }, { "epoch": 0.68, "learning_rate": 1.543216730709866e-06, "logits/chosen": -0.5768697261810303, "logits/rejected": -0.5673550367355347, "logps/chosen": -178.18157958984375, "logps/rejected": -353.0260314941406, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.2121001482009888, "rewards/margins": 12.424789428710938, "rewards/rejected": -13.636889457702637, "step": 1979 }, { "epoch": 0.68, "learning_rate": 1.5427525982072961e-06, "logits/chosen": -0.5937886238098145, "logits/rejected": -0.5774860382080078, "logps/chosen": -192.7985382080078, "logps/rejected": -298.8994140625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.23893451690673828, "rewards/margins": 12.022961616516113, "rewards/rejected": -12.261897087097168, "step": 1980 }, { "epoch": 0.68, "learning_rate": 1.5422882999012975e-06, "logits/chosen": -0.5866549611091614, "logits/rejected": -0.544466495513916, "logps/chosen": -187.62957763671875, "logps/rejected": -238.74696350097656, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.38228461146354675, "rewards/margins": 11.290982246398926, "rewards/rejected": -11.673267364501953, "step": 1981 }, { "epoch": 0.68, "learning_rate": 1.5418238359337075e-06, "logits/chosen": -0.6204692721366882, "logits/rejected": -0.6088110208511353, "logps/chosen": -295.2710876464844, "logps/rejected": -462.59454345703125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.6516939997673035, "rewards/margins": 17.10616683959961, "rewards/rejected": -17.75786018371582, "step": 1982 }, { "epoch": 0.68, "learning_rate": 1.5413592064464126e-06, "logits/chosen": -0.5161440372467041, "logits/rejected": -0.4784018099308014, "logps/chosen": -169.5770263671875, "logps/rejected": -286.6343078613281, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.8314815759658813, "rewards/margins": 13.171304702758789, "rewards/rejected": -14.002786636352539, "step": 1983 }, { "epoch": 0.68, "learning_rate": 1.540894411581351e-06, "logits/chosen": -0.560177206993103, "logits/rejected": -0.505555272102356, "logps/chosen": -212.22247314453125, "logps/rejected": -260.43426513671875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -1.1368801593780518, "rewards/margins": 12.62476634979248, "rewards/rejected": -13.761648178100586, "step": 1984 }, { "epoch": 0.68, "learning_rate": 1.5404294514805113e-06, "logits/chosen": -0.4846230149269104, "logits/rejected": -0.474069744348526, "logps/chosen": -161.3939208984375, "logps/rejected": -248.93898010253906, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.0316944122314453, "rewards/margins": 10.300036430358887, "rewards/rejected": -11.331730842590332, "step": 1985 }, { "epoch": 0.68, "learning_rate": 1.5399643262859323e-06, "logits/chosen": -0.5828310251235962, "logits/rejected": -0.5566099882125854, "logps/chosen": -187.21119689941406, "logps/rejected": -308.6590576171875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.4569149017333984, "rewards/margins": 11.527081489562988, "rewards/rejected": -12.983996391296387, "step": 1986 }, { "epoch": 0.68, "learning_rate": 1.5394990361397034e-06, "logits/chosen": -0.6857179403305054, "logits/rejected": -0.6330738663673401, "logps/chosen": -230.33316040039062, "logps/rejected": -322.3265380859375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.4433883428573608, "rewards/margins": 13.640491485595703, "rewards/rejected": -15.083879470825195, "step": 1987 }, { "epoch": 0.68, "learning_rate": 1.5390335811839639e-06, "logits/chosen": -0.6993080973625183, "logits/rejected": -0.6385532021522522, "logps/chosen": -234.98582458496094, "logps/rejected": -245.98045349121094, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 1.004736065864563, "rewards/margins": 10.578418731689453, "rewards/rejected": -9.57368278503418, "step": 1988 }, { "epoch": 0.68, "learning_rate": 1.5385679615609043e-06, "logits/chosen": -0.6141374111175537, "logits/rejected": -0.5783082246780396, "logps/chosen": -179.12075805664062, "logps/rejected": -314.1458435058594, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.2832608222961426, "rewards/margins": 13.527189254760742, "rewards/rejected": -15.810450553894043, "step": 1989 }, { "epoch": 0.68, "learning_rate": 1.5381021774127648e-06, "logits/chosen": -0.5589599013328552, "logits/rejected": -0.5302790403366089, "logps/chosen": -204.471923828125, "logps/rejected": -338.820068359375, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.8350077867507935, "rewards/margins": 12.761434555053711, "rewards/rejected": -13.596443176269531, "step": 1990 }, { "epoch": 0.68, "learning_rate": 1.5376362288818363e-06, "logits/chosen": -0.6321222186088562, "logits/rejected": -0.6233676075935364, "logps/chosen": -253.85997009277344, "logps/rejected": -431.8516845703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.5649476051330566, "rewards/margins": 15.35597038269043, "rewards/rejected": -17.92091941833496, "step": 1991 }, { "epoch": 0.68, "learning_rate": 1.5371701161104591e-06, "logits/chosen": -0.4811760485172272, "logits/rejected": -0.4575061798095703, "logps/chosen": -189.0250244140625, "logps/rejected": -311.84625244140625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6995887756347656, "rewards/margins": 14.862627029418945, "rewards/rejected": -15.562215805053711, "step": 1992 }, { "epoch": 0.68, "learning_rate": 1.5367038392410246e-06, "logits/chosen": -0.5732390880584717, "logits/rejected": -0.5755289196968079, "logps/chosen": -119.9129409790039, "logps/rejected": -280.9701843261719, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5919491648674011, "rewards/margins": 13.838155746459961, "rewards/rejected": -14.430105209350586, "step": 1993 }, { "epoch": 0.68, "learning_rate": 1.536237398415974e-06, "logits/chosen": -0.56004399061203, "logits/rejected": -0.5409168601036072, "logps/chosen": -289.1009521484375, "logps/rejected": -455.3426818847656, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.41263875365257263, "rewards/margins": 16.444656372070312, "rewards/rejected": -16.85729217529297, "step": 1994 }, { "epoch": 0.68, "learning_rate": 1.5357707937777984e-06, "logits/chosen": -0.5787034034729004, "logits/rejected": -0.5364908576011658, "logps/chosen": -274.0715026855469, "logps/rejected": -387.75396728515625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.8390706777572632, "rewards/margins": 13.332645416259766, "rewards/rejected": -14.171716690063477, "step": 1995 }, { "epoch": 0.68, "learning_rate": 1.5353040254690392e-06, "logits/chosen": -0.6397708058357239, "logits/rejected": -0.6251770257949829, "logps/chosen": -228.6563262939453, "logps/rejected": -354.07781982421875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7983678579330444, "rewards/margins": 12.277859687805176, "rewards/rejected": -13.076226234436035, "step": 1996 }, { "epoch": 0.68, "learning_rate": 1.5348370936322873e-06, "logits/chosen": -0.5847566723823547, "logits/rejected": -0.5855842232704163, "logps/chosen": -146.07125854492188, "logps/rejected": -287.61737060546875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.309357762336731, "rewards/margins": 11.724381446838379, "rewards/rejected": -13.033740043640137, "step": 1997 }, { "epoch": 0.68, "learning_rate": 1.5343699984101845e-06, "logits/chosen": -0.5655943155288696, "logits/rejected": -0.5263355374336243, "logps/chosen": -252.90533447265625, "logps/rejected": -312.3736267089844, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.999870777130127, "rewards/margins": 11.277655601501465, "rewards/rejected": -12.27752685546875, "step": 1998 }, { "epoch": 0.68, "learning_rate": 1.5339027399454217e-06, "logits/chosen": -0.49397286772727966, "logits/rejected": -0.46224480867385864, "logps/chosen": -182.7061309814453, "logps/rejected": -246.0167999267578, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1926636695861816, "rewards/margins": 9.942785263061523, "rewards/rejected": -11.135448455810547, "step": 1999 }, { "epoch": 0.68, "learning_rate": 1.5334353183807396e-06, "logits/chosen": -0.53053218126297, "logits/rejected": -0.5127534866333008, "logps/chosen": -162.11614990234375, "logps/rejected": -324.3226318359375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.6967304348945618, "rewards/margins": 15.773177146911621, "rewards/rejected": -16.469907760620117, "step": 2000 }, { "epoch": 0.68, "learning_rate": 1.5329677338589292e-06, "logits/chosen": -0.6191670894622803, "logits/rejected": -0.5652368068695068, "logps/chosen": -216.77882385253906, "logps/rejected": -338.0050964355469, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -2.0943071842193604, "rewards/margins": 13.394308090209961, "rewards/rejected": -15.488614082336426, "step": 2001 }, { "epoch": 0.68, "learning_rate": 1.5324999865228314e-06, "logits/chosen": -0.5051791667938232, "logits/rejected": -0.4709170460700989, "logps/chosen": -150.15696716308594, "logps/rejected": -236.32777404785156, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.2999570965766907, "rewards/margins": 12.415840148925781, "rewards/rejected": -12.115882873535156, "step": 2002 }, { "epoch": 0.68, "learning_rate": 1.5320320765153365e-06, "logits/chosen": -0.49325791001319885, "logits/rejected": -0.45350027084350586, "logps/chosen": -237.94253540039062, "logps/rejected": -345.49334716796875, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": 0.04649461805820465, "rewards/margins": 14.996533393859863, "rewards/rejected": -14.95003890991211, "step": 2003 }, { "epoch": 0.68, "learning_rate": 1.5315640039793844e-06, "logits/chosen": -0.498147577047348, "logits/rejected": -0.49193793535232544, "logps/chosen": -187.23912048339844, "logps/rejected": -277.1463928222656, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.452833652496338, "rewards/margins": 10.516162872314453, "rewards/rejected": -11.968996047973633, "step": 2004 }, { "epoch": 0.68, "learning_rate": 1.5310957690579646e-06, "logits/chosen": -0.5061562657356262, "logits/rejected": -0.4641948640346527, "logps/chosen": -207.0508575439453, "logps/rejected": -290.3516845703125, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -1.1855700016021729, "rewards/margins": 11.563150405883789, "rewards/rejected": -12.7487211227417, "step": 2005 }, { "epoch": 0.68, "learning_rate": 1.5306273718941168e-06, "logits/chosen": -0.5973036289215088, "logits/rejected": -0.5584259629249573, "logps/chosen": -211.4164276123047, "logps/rejected": -292.09759521484375, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -1.1526237726211548, "rewards/margins": 13.013713836669922, "rewards/rejected": -14.166338920593262, "step": 2006 }, { "epoch": 0.68, "learning_rate": 1.5301588126309297e-06, "logits/chosen": -0.5464292168617249, "logits/rejected": -0.5122910141944885, "logps/chosen": -196.36489868164062, "logps/rejected": -312.98162841796875, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -1.7096033096313477, "rewards/margins": 13.070318222045898, "rewards/rejected": -14.779923439025879, "step": 2007 }, { "epoch": 0.69, "learning_rate": 1.5296900914115416e-06, "logits/chosen": -0.6346040964126587, "logits/rejected": -0.6102772355079651, "logps/chosen": -156.0939483642578, "logps/rejected": -245.65863037109375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.7309579849243164, "rewards/margins": 11.396395683288574, "rewards/rejected": -12.127352714538574, "step": 2008 }, { "epoch": 0.69, "learning_rate": 1.5292212083791408e-06, "logits/chosen": -0.496894896030426, "logits/rejected": -0.48901382088661194, "logps/chosen": -200.0525665283203, "logps/rejected": -348.2105407714844, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.914900541305542, "rewards/margins": 13.127992630004883, "rewards/rejected": -15.04289436340332, "step": 2009 }, { "epoch": 0.69, "learning_rate": 1.5287521636769639e-06, "logits/chosen": -0.5775795578956604, "logits/rejected": -0.5663731694221497, "logps/chosen": -161.881103515625, "logps/rejected": -293.569580078125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.5935816764831543, "rewards/margins": 11.727190971374512, "rewards/rejected": -13.320772171020508, "step": 2010 }, { "epoch": 0.69, "learning_rate": 1.5282829574482979e-06, "logits/chosen": -0.5730246305465698, "logits/rejected": -0.5568118095397949, "logps/chosen": -197.12709045410156, "logps/rejected": -313.0490417480469, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -1.132088541984558, "rewards/margins": 12.343036651611328, "rewards/rejected": -13.475125312805176, "step": 2011 }, { "epoch": 0.69, "learning_rate": 1.5278135898364792e-06, "logits/chosen": -0.5285500288009644, "logits/rejected": -0.519469141960144, "logps/chosen": -156.5060272216797, "logps/rejected": -255.8651580810547, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -2.2470524311065674, "rewards/margins": 9.326314926147461, "rewards/rejected": -11.573368072509766, "step": 2012 }, { "epoch": 0.69, "learning_rate": 1.5273440609848926e-06, "logits/chosen": -0.605587363243103, "logits/rejected": -0.5888116955757141, "logps/chosen": -181.81993103027344, "logps/rejected": -347.0660400390625, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.7517759203910828, "rewards/margins": 13.669754028320312, "rewards/rejected": -14.421529769897461, "step": 2013 }, { "epoch": 0.69, "learning_rate": 1.5268743710369728e-06, "logits/chosen": -0.5430377721786499, "logits/rejected": -0.5103164315223694, "logps/chosen": -268.5646057128906, "logps/rejected": -367.8023376464844, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -0.33888083696365356, "rewards/margins": 12.117342948913574, "rewards/rejected": -12.45622444152832, "step": 2014 }, { "epoch": 0.69, "learning_rate": 1.5264045201362037e-06, "logits/chosen": -0.4510698616504669, "logits/rejected": -0.39768052101135254, "logps/chosen": -220.82894897460938, "logps/rejected": -306.06341552734375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.5872662663459778, "rewards/margins": 12.485069274902344, "rewards/rejected": -13.072336196899414, "step": 2015 }, { "epoch": 0.69, "learning_rate": 1.5259345084261185e-06, "logits/chosen": -0.4655246138572693, "logits/rejected": -0.45615896582603455, "logps/chosen": -201.86172485351562, "logps/rejected": -319.3332214355469, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.1851418018341064, "rewards/margins": 12.484410285949707, "rewards/rejected": -14.669551849365234, "step": 2016 }, { "epoch": 0.69, "learning_rate": 1.5254643360502986e-06, "logits/chosen": -0.5888970494270325, "logits/rejected": -0.5632923245429993, "logps/chosen": -147.91400146484375, "logps/rejected": -305.9006042480469, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -2.3722500801086426, "rewards/margins": 13.823603630065918, "rewards/rejected": -16.19585418701172, "step": 2017 }, { "epoch": 0.69, "learning_rate": 1.524994003152376e-06, "logits/chosen": -0.5612016916275024, "logits/rejected": -0.5412881374359131, "logps/chosen": -165.386962890625, "logps/rejected": -326.19232177734375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.3966684341430664, "rewards/margins": 16.45924186706543, "rewards/rejected": -17.855911254882812, "step": 2018 }, { "epoch": 0.69, "learning_rate": 1.52452350987603e-06, "logits/chosen": -0.5845295786857605, "logits/rejected": -0.5745260715484619, "logps/chosen": -198.64186096191406, "logps/rejected": -315.22955322265625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.16719728708267212, "rewards/margins": 12.007102012634277, "rewards/rejected": -12.174299240112305, "step": 2019 }, { "epoch": 0.69, "learning_rate": 1.5240528563649905e-06, "logits/chosen": -0.5142773985862732, "logits/rejected": -0.5053766965866089, "logps/chosen": -163.83909606933594, "logps/rejected": -312.9483947753906, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.9947868585586548, "rewards/margins": 12.267655372619629, "rewards/rejected": -14.262443542480469, "step": 2020 }, { "epoch": 0.69, "learning_rate": 1.5235820427630359e-06, "logits/chosen": -0.5031763911247253, "logits/rejected": -0.46027034521102905, "logps/chosen": -277.4839172363281, "logps/rejected": -407.8612365722656, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5807909965515137, "rewards/margins": 15.731470108032227, "rewards/rejected": -17.3122615814209, "step": 2021 }, { "epoch": 0.69, "learning_rate": 1.5231110692139924e-06, "logits/chosen": -0.607952892780304, "logits/rejected": -0.6008445024490356, "logps/chosen": -164.55392456054688, "logps/rejected": -278.38616943359375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.5437155961990356, "rewards/margins": 11.843989372253418, "rewards/rejected": -13.387706756591797, "step": 2022 }, { "epoch": 0.69, "learning_rate": 1.5226399358617368e-06, "logits/chosen": -0.4579826891422272, "logits/rejected": -0.4173662066459656, "logps/chosen": -164.0971221923828, "logps/rejected": -268.0314636230469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9572528600692749, "rewards/margins": 10.696074485778809, "rewards/rejected": -11.653327941894531, "step": 2023 }, { "epoch": 0.69, "learning_rate": 1.5221686428501928e-06, "logits/chosen": -0.6420993804931641, "logits/rejected": -0.5913334488868713, "logps/chosen": -277.8172607421875, "logps/rejected": -361.2994689941406, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -0.14451530575752258, "rewards/margins": 16.152084350585938, "rewards/rejected": -16.296600341796875, "step": 2024 }, { "epoch": 0.69, "learning_rate": 1.5216971903233349e-06, "logits/chosen": -0.61092209815979, "logits/rejected": -0.5681262016296387, "logps/chosen": -182.30026245117188, "logps/rejected": -282.5867919921875, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": -1.558017611503601, "rewards/margins": 12.599210739135742, "rewards/rejected": -14.157228469848633, "step": 2025 }, { "epoch": 0.69, "learning_rate": 1.5212255784251847e-06, "logits/chosen": -0.6113483905792236, "logits/rejected": -0.5766159892082214, "logps/chosen": -184.4314727783203, "logps/rejected": -300.5614318847656, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.697356104850769, "rewards/margins": 12.531900405883789, "rewards/rejected": -14.229257583618164, "step": 2026 }, { "epoch": 0.69, "learning_rate": 1.5207538072998134e-06, "logits/chosen": -0.5789797902107239, "logits/rejected": -0.5557816028594971, "logps/chosen": -213.74533081054688, "logps/rejected": -336.911376953125, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -0.6470258235931396, "rewards/margins": 14.748435020446777, "rewards/rejected": -15.395461082458496, "step": 2027 }, { "epoch": 0.69, "learning_rate": 1.5202818770913405e-06, "logits/chosen": -0.571139931678772, "logits/rejected": -0.5372803211212158, "logps/chosen": -215.61508178710938, "logps/rejected": -330.0345153808594, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -0.17568671703338623, "rewards/margins": 13.845352172851562, "rewards/rejected": -14.021038055419922, "step": 2028 }, { "epoch": 0.69, "learning_rate": 1.5198097879439344e-06, "logits/chosen": -0.5168466567993164, "logits/rejected": -0.48117581009864807, "logps/chosen": -206.1190185546875, "logps/rejected": -307.0007629394531, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.1358579397201538, "rewards/margins": 13.281927108764648, "rewards/rejected": -14.41778564453125, "step": 2029 }, { "epoch": 0.69, "learning_rate": 1.5193375400018116e-06, "logits/chosen": -0.5909090042114258, "logits/rejected": -0.5606480836868286, "logps/chosen": -238.83953857421875, "logps/rejected": -317.6593322753906, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955515742301941, "rewards/margins": 12.831032752990723, "rewards/rejected": -13.026583671569824, "step": 2030 }, { "epoch": 0.69, "learning_rate": 1.518865133409237e-06, "logits/chosen": -0.571763277053833, "logits/rejected": -0.5646483302116394, "logps/chosen": -204.0753173828125, "logps/rejected": -346.783447265625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.616446852684021, "rewards/margins": 11.99544906616211, "rewards/rejected": -12.611895561218262, "step": 2031 }, { "epoch": 0.69, "learning_rate": 1.5183925683105251e-06, "logits/chosen": -0.627008855342865, "logits/rejected": -0.5506458878517151, "logps/chosen": -240.3117218017578, "logps/rejected": -267.72802734375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.2538607120513916, "rewards/margins": 12.309940338134766, "rewards/rejected": -13.563800811767578, "step": 2032 }, { "epoch": 0.69, "learning_rate": 1.5179198448500373e-06, "logits/chosen": -0.5345319509506226, "logits/rejected": -0.5158758759498596, "logps/chosen": -187.02047729492188, "logps/rejected": -306.167724609375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.6348987817764282, "rewards/margins": 11.73796558380127, "rewards/rejected": -12.37286376953125, "step": 2033 }, { "epoch": 0.69, "learning_rate": 1.5174469631721843e-06, "logits/chosen": -0.517194390296936, "logits/rejected": -0.5069980025291443, "logps/chosen": -188.38304138183594, "logps/rejected": -316.9607238769531, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.2183177471160889, "rewards/margins": 12.197360038757324, "rewards/rejected": -13.415678024291992, "step": 2034 }, { "epoch": 0.69, "learning_rate": 1.516973923421425e-06, "logits/chosen": -0.5762287378311157, "logits/rejected": -0.5441647171974182, "logps/chosen": -216.70803833007812, "logps/rejected": -321.8620910644531, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3726942539215088, "rewards/margins": 13.787717819213867, "rewards/rejected": -15.16041374206543, "step": 2035 }, { "epoch": 0.69, "learning_rate": 1.5165007257422666e-06, "logits/chosen": -0.5387497544288635, "logits/rejected": -0.49474838376045227, "logps/chosen": -256.5857849121094, "logps/rejected": -364.71514892578125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.8978761434555054, "rewards/margins": 15.959890365600586, "rewards/rejected": -16.857769012451172, "step": 2036 }, { "epoch": 0.7, "learning_rate": 1.5160273702792644e-06, "logits/chosen": -0.5733202695846558, "logits/rejected": -0.5537273287773132, "logps/chosen": -197.387939453125, "logps/rejected": -354.2930908203125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.085516929626465, "rewards/margins": 15.255136489868164, "rewards/rejected": -17.340652465820312, "step": 2037 }, { "epoch": 0.7, "learning_rate": 1.5155538571770216e-06, "logits/chosen": -0.598174512386322, "logits/rejected": -0.5591245889663696, "logps/chosen": -209.65467834472656, "logps/rejected": -299.67279052734375, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.5555301904678345, "rewards/margins": 14.184423446655273, "rewards/rejected": -14.739953994750977, "step": 2038 }, { "epoch": 0.7, "learning_rate": 1.5150801865801905e-06, "logits/chosen": -0.5307649374008179, "logits/rejected": -0.49260032176971436, "logps/chosen": -172.96224975585938, "logps/rejected": -243.49278259277344, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.5265146493911743, "rewards/margins": 10.659200668334961, "rewards/rejected": -12.185715675354004, "step": 2039 }, { "epoch": 0.7, "learning_rate": 1.5146063586334706e-06, "logits/chosen": -0.4368223249912262, "logits/rejected": -0.43093982338905334, "logps/chosen": -167.7604522705078, "logps/rejected": -265.107421875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.047843337059021, "rewards/margins": 9.585797309875488, "rewards/rejected": -10.63364028930664, "step": 2040 }, { "epoch": 0.7, "learning_rate": 1.5141323734816099e-06, "logits/chosen": -0.5821275115013123, "logits/rejected": -0.5671823620796204, "logps/chosen": -148.71697998046875, "logps/rejected": -348.4552307128906, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.8384435176849365, "rewards/margins": 14.757176399230957, "rewards/rejected": -15.595621109008789, "step": 2041 }, { "epoch": 0.7, "learning_rate": 1.5136582312694043e-06, "logits/chosen": -0.5739021301269531, "logits/rejected": -0.5511860847473145, "logps/chosen": -173.84808349609375, "logps/rejected": -288.4396057128906, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.6371456384658813, "rewards/margins": 11.809759140014648, "rewards/rejected": -12.446905136108398, "step": 2042 }, { "epoch": 0.7, "learning_rate": 1.5131839321416977e-06, "logits/chosen": -0.5406240820884705, "logits/rejected": -0.5022215247154236, "logps/chosen": -199.3888702392578, "logps/rejected": -365.243408203125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 0.013942994177341461, "rewards/margins": 17.051010131835938, "rewards/rejected": -17.037067413330078, "step": 2043 }, { "epoch": 0.7, "learning_rate": 1.5127094762433816e-06, "logits/chosen": -0.43418511748313904, "logits/rejected": -0.3935820758342743, "logps/chosen": -206.03421020507812, "logps/rejected": -318.9962158203125, "loss": 0.0299, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2124760150909424, "rewards/margins": 13.585909843444824, "rewards/rejected": -15.798386573791504, "step": 2044 }, { "epoch": 0.7, "learning_rate": 1.5122348637193966e-06, "logits/chosen": -0.6018531918525696, "logits/rejected": -0.5669140219688416, "logps/chosen": -259.9991149902344, "logps/rejected": -386.74737548828125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.7129173874855042, "rewards/margins": 15.708025932312012, "rewards/rejected": -16.420944213867188, "step": 2045 }, { "epoch": 0.7, "learning_rate": 1.511760094714729e-06, "logits/chosen": -0.4971896708011627, "logits/rejected": -0.46233075857162476, "logps/chosen": -191.33596801757812, "logps/rejected": -283.206787109375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9468548893928528, "rewards/margins": 13.32969856262207, "rewards/rejected": -14.276552200317383, "step": 2046 }, { "epoch": 0.7, "learning_rate": 1.5112851693744158e-06, "logits/chosen": -0.6170181632041931, "logits/rejected": -0.5943012237548828, "logps/chosen": -205.23004150390625, "logps/rejected": -307.3113098144531, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.7886630296707153, "rewards/margins": 11.795305252075195, "rewards/rejected": -12.583967208862305, "step": 2047 }, { "epoch": 0.7, "learning_rate": 1.5108100878435386e-06, "logits/chosen": -0.6182915568351746, "logits/rejected": -0.5928004384040833, "logps/chosen": -227.44677734375, "logps/rejected": -351.0623779296875, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.9327389597892761, "rewards/margins": 13.095782279968262, "rewards/rejected": -14.028521537780762, "step": 2048 }, { "epoch": 0.7, "learning_rate": 1.5103348502672293e-06, "logits/chosen": -0.5292080640792847, "logits/rejected": -0.4697883129119873, "logps/chosen": -192.86126708984375, "logps/rejected": -290.66644287109375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.5254133343696594, "rewards/margins": 15.07408618927002, "rewards/rejected": -14.548672676086426, "step": 2049 }, { "epoch": 0.7, "learning_rate": 1.5098594567906655e-06, "logits/chosen": -0.5311728119850159, "logits/rejected": -0.5346858501434326, "logps/chosen": -174.6626739501953, "logps/rejected": -338.5954895019531, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -0.6534835696220398, "rewards/margins": 12.676594734191895, "rewards/rejected": -13.330077171325684, "step": 2050 }, { "epoch": 0.7, "learning_rate": 1.5093839075590742e-06, "logits/chosen": -0.6858766078948975, "logits/rejected": -0.6526615023612976, "logps/chosen": -225.32196044921875, "logps/rejected": -318.27569580078125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.7807451486587524, "rewards/margins": 11.770197868347168, "rewards/rejected": -12.550943374633789, "step": 2051 }, { "epoch": 0.7, "learning_rate": 1.5089082027177289e-06, "logits/chosen": -0.5884453654289246, "logits/rejected": -0.5730290412902832, "logps/chosen": -205.44296264648438, "logps/rejected": -339.5841369628906, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.508734107017517, "rewards/margins": 13.241935729980469, "rewards/rejected": -14.750669479370117, "step": 2052 }, { "epoch": 0.7, "learning_rate": 1.5084323424119507e-06, "logits/chosen": -0.5468263626098633, "logits/rejected": -0.5250105857849121, "logps/chosen": -233.72280883789062, "logps/rejected": -344.9111328125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.26769545674324036, "rewards/margins": 13.291556358337402, "rewards/rejected": -13.559252738952637, "step": 2053 }, { "epoch": 0.7, "learning_rate": 1.5079563267871083e-06, "logits/chosen": -0.660434901714325, "logits/rejected": -0.6265596151351929, "logps/chosen": -248.8340301513672, "logps/rejected": -398.73126220703125, "loss": 0.0363, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6406698226928711, "rewards/margins": 14.796074867248535, "rewards/rejected": -15.436744689941406, "step": 2054 }, { "epoch": 0.7, "learning_rate": 1.507480155988618e-06, "logits/chosen": -0.523834228515625, "logits/rejected": -0.5211417078971863, "logps/chosen": -215.23562622070312, "logps/rejected": -357.14837646484375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.1772050857543945, "rewards/margins": 13.552024841308594, "rewards/rejected": -15.729228973388672, "step": 2055 }, { "epoch": 0.7, "learning_rate": 1.5070038301619437e-06, "logits/chosen": -0.566680371761322, "logits/rejected": -0.5701434016227722, "logps/chosen": -159.05892944335938, "logps/rejected": -306.2895202636719, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": -1.3290231227874756, "rewards/margins": 12.180088996887207, "rewards/rejected": -13.509111404418945, "step": 2056 }, { "epoch": 0.7, "learning_rate": 1.5065273494525954e-06, "logits/chosen": -0.5936430096626282, "logits/rejected": -0.579179584980011, "logps/chosen": -191.63912963867188, "logps/rejected": -355.0992126464844, "loss": 0.0352, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7404829263687134, "rewards/margins": 15.458370208740234, "rewards/rejected": -16.1988525390625, "step": 2057 }, { "epoch": 0.7, "learning_rate": 1.506050714006133e-06, "logits/chosen": -0.5513507127761841, "logits/rejected": -0.5324780941009521, "logps/chosen": -212.23728942871094, "logps/rejected": -311.72552490234375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.28780803084373474, "rewards/margins": 13.173173904418945, "rewards/rejected": -13.460980415344238, "step": 2058 }, { "epoch": 0.7, "learning_rate": 1.5055739239681601e-06, "logits/chosen": -0.5175256729125977, "logits/rejected": -0.5047991871833801, "logps/chosen": -181.545654296875, "logps/rejected": -306.9863586425781, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.13394604623317719, "rewards/margins": 12.448282241821289, "rewards/rejected": -12.582229614257812, "step": 2059 }, { "epoch": 0.7, "learning_rate": 1.5050969794843313e-06, "logits/chosen": -0.6025569438934326, "logits/rejected": -0.5965052247047424, "logps/chosen": -156.3590850830078, "logps/rejected": -250.48526000976562, "loss": 0.0299, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2899185419082642, "rewards/margins": 10.584571838378906, "rewards/rejected": -11.874490737915039, "step": 2060 }, { "epoch": 0.7, "learning_rate": 1.5046198807003458e-06, "logits/chosen": -0.6415776610374451, "logits/rejected": -0.5944771766662598, "logps/chosen": -190.31385803222656, "logps/rejected": -251.06776428222656, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.9677563905715942, "rewards/margins": 12.346137046813965, "rewards/rejected": -13.31389331817627, "step": 2061 }, { "epoch": 0.7, "learning_rate": 1.5041426277619503e-06, "logits/chosen": -0.5482490062713623, "logits/rejected": -0.5476288199424744, "logps/chosen": -65.51890563964844, "logps/rejected": -171.4830322265625, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.9033252000808716, "rewards/margins": 8.763236999511719, "rewards/rejected": -9.6665620803833, "step": 2062 }, { "epoch": 0.7, "learning_rate": 1.5036652208149392e-06, "logits/chosen": -0.5798502564430237, "logits/rejected": -0.5334081053733826, "logps/chosen": -208.95347595214844, "logps/rejected": -348.21405029296875, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.6306250095367432, "rewards/margins": 15.405961036682129, "rewards/rejected": -17.03658676147461, "step": 2063 }, { "epoch": 0.7, "learning_rate": 1.503187660005154e-06, "logits/chosen": -0.5874243974685669, "logits/rejected": -0.5872796177864075, "logps/chosen": -123.31575012207031, "logps/rejected": -281.503662109375, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.4041790962219238, "rewards/margins": 12.784440994262695, "rewards/rejected": -14.188619613647461, "step": 2064 }, { "epoch": 0.7, "learning_rate": 1.5027099454784828e-06, "logits/chosen": -0.5357478857040405, "logits/rejected": -0.5044038891792297, "logps/chosen": -198.861572265625, "logps/rejected": -307.2325744628906, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 0.3739904761314392, "rewards/margins": 14.47353744506836, "rewards/rejected": -14.099546432495117, "step": 2065 }, { "epoch": 0.71, "learning_rate": 1.502232077380861e-06, "logits/chosen": -0.633539080619812, "logits/rejected": -0.6002005338668823, "logps/chosen": -220.4051513671875, "logps/rejected": -340.3505859375, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": 0.5060713887214661, "rewards/margins": 16.34796905517578, "rewards/rejected": -15.841898918151855, "step": 2066 }, { "epoch": 0.71, "learning_rate": 1.5017540558582706e-06, "logits/chosen": -0.5147985816001892, "logits/rejected": -0.509042501449585, "logps/chosen": -151.82272338867188, "logps/rejected": -284.4278259277344, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.1591346263885498, "rewards/margins": 11.872520446777344, "rewards/rejected": -13.031655311584473, "step": 2067 }, { "epoch": 0.71, "learning_rate": 1.5012758810567404e-06, "logits/chosen": -0.5950242280960083, "logits/rejected": -0.5987311005592346, "logps/chosen": -225.92120361328125, "logps/rejected": -413.30914306640625, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -1.1969819068908691, "rewards/margins": 14.543704986572266, "rewards/rejected": -15.740686416625977, "step": 2068 }, { "epoch": 0.71, "learning_rate": 1.500797553122346e-06, "logits/chosen": -0.5997520685195923, "logits/rejected": -0.5578483939170837, "logps/chosen": -281.8834533691406, "logps/rejected": -340.09954833984375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.25389373302459717, "rewards/margins": 14.23189640045166, "rewards/rejected": -14.485790252685547, "step": 2069 }, { "epoch": 0.71, "learning_rate": 1.5003190722012108e-06, "logits/chosen": -0.6626520156860352, "logits/rejected": -0.6315970420837402, "logps/chosen": -196.23475646972656, "logps/rejected": -319.29217529296875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.902649462223053, "rewards/margins": 12.181406021118164, "rewards/rejected": -13.08405590057373, "step": 2070 }, { "epoch": 0.71, "learning_rate": 1.4998404384395031e-06, "logits/chosen": -0.4918622076511383, "logits/rejected": -0.4736103117465973, "logps/chosen": -208.2089385986328, "logps/rejected": -375.9645080566406, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.0011940002441406, "rewards/margins": 15.647217750549316, "rewards/rejected": -17.648412704467773, "step": 2071 }, { "epoch": 0.71, "learning_rate": 1.4993616519834395e-06, "logits/chosen": -0.5989003777503967, "logits/rejected": -0.5920656323432922, "logps/chosen": -197.17767333984375, "logps/rejected": -338.50042724609375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.8219606876373291, "rewards/margins": 11.59839916229248, "rewards/rejected": -12.420358657836914, "step": 2072 }, { "epoch": 0.71, "learning_rate": 1.4988827129792827e-06, "logits/chosen": -0.5586848258972168, "logits/rejected": -0.5411122441291809, "logps/chosen": -206.7830810546875, "logps/rejected": -341.77532958984375, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.5421831607818604, "rewards/margins": 14.326262474060059, "rewards/rejected": -15.86844539642334, "step": 2073 }, { "epoch": 0.71, "learning_rate": 1.4984036215733418e-06, "logits/chosen": -0.585626482963562, "logits/rejected": -0.5468451380729675, "logps/chosen": -241.59278869628906, "logps/rejected": -331.450927734375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.08668404072523117, "rewards/margins": 14.436369895935059, "rewards/rejected": -14.349684715270996, "step": 2074 }, { "epoch": 0.71, "learning_rate": 1.4979243779119725e-06, "logits/chosen": -0.5495830774307251, "logits/rejected": -0.5182611346244812, "logps/chosen": -220.01654052734375, "logps/rejected": -346.5329284667969, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.8023176193237305, "rewards/margins": 14.699907302856445, "rewards/rejected": -15.502224922180176, "step": 2075 }, { "epoch": 0.71, "learning_rate": 1.4974449821415775e-06, "logits/chosen": -0.6187906265258789, "logits/rejected": -0.6122561693191528, "logps/chosen": -234.135986328125, "logps/rejected": -379.68499755859375, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -2.3174102306365967, "rewards/margins": 13.284377098083496, "rewards/rejected": -15.601786613464355, "step": 2076 }, { "epoch": 0.71, "learning_rate": 1.496965434408605e-06, "logits/chosen": -0.5343340039253235, "logits/rejected": -0.5157061219215393, "logps/chosen": -174.52691650390625, "logps/rejected": -315.52557373046875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.044230341911316, "rewards/margins": 11.912093162536621, "rewards/rejected": -12.956323623657227, "step": 2077 }, { "epoch": 0.71, "learning_rate": 1.4964857348595507e-06, "logits/chosen": -0.6281740069389343, "logits/rejected": -0.5959730744361877, "logps/chosen": -227.19334411621094, "logps/rejected": -350.6722412109375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.4452427625656128, "rewards/margins": 13.865306854248047, "rewards/rejected": -14.31054973602295, "step": 2078 }, { "epoch": 0.71, "learning_rate": 1.4960058836409562e-06, "logits/chosen": -0.46609458327293396, "logits/rejected": -0.42590564489364624, "logps/chosen": -211.9001007080078, "logps/rejected": -298.5115051269531, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.3506181240081787, "rewards/margins": 12.101242065429688, "rewards/rejected": -13.451861381530762, "step": 2079 }, { "epoch": 0.71, "learning_rate": 1.4955258808994094e-06, "logits/chosen": -0.45578739047050476, "logits/rejected": -0.42882323265075684, "logps/chosen": -204.8718719482422, "logps/rejected": -394.23638916015625, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -1.7250196933746338, "rewards/margins": 18.20765495300293, "rewards/rejected": -19.932676315307617, "step": 2080 }, { "epoch": 0.71, "learning_rate": 1.495045726781544e-06, "logits/chosen": -0.6404184699058533, "logits/rejected": -0.6277850270271301, "logps/chosen": -198.18417358398438, "logps/rejected": -343.22015380859375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.0014398097991943, "rewards/margins": 13.57553482055664, "rewards/rejected": -15.576974868774414, "step": 2081 }, { "epoch": 0.71, "learning_rate": 1.4945654214340412e-06, "logits/chosen": -0.5665796995162964, "logits/rejected": -0.5575661659240723, "logps/chosen": -203.3894500732422, "logps/rejected": -287.0931091308594, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.34298765659332275, "rewards/margins": 10.248624801635742, "rewards/rejected": -10.591611862182617, "step": 2082 }, { "epoch": 0.71, "learning_rate": 1.494084965003627e-06, "logits/chosen": -0.5407591462135315, "logits/rejected": -0.5113416910171509, "logps/chosen": -240.79385375976562, "logps/rejected": -396.6626892089844, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.7893849611282349, "rewards/margins": 17.43865203857422, "rewards/rejected": -19.228036880493164, "step": 2083 }, { "epoch": 0.71, "learning_rate": 1.4936043576370747e-06, "logits/chosen": -0.5554946064949036, "logits/rejected": -0.5371156930923462, "logps/chosen": -152.22256469726562, "logps/rejected": -272.16656494140625, "loss": 0.0349, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4614126682281494, "rewards/margins": 11.768229484558105, "rewards/rejected": -13.229642868041992, "step": 2084 }, { "epoch": 0.71, "learning_rate": 1.4931235994812031e-06, "logits/chosen": -0.5326744318008423, "logits/rejected": -0.5024693012237549, "logps/chosen": -193.53643798828125, "logps/rejected": -319.5364990234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9511629343032837, "rewards/margins": 12.915971755981445, "rewards/rejected": -13.867135047912598, "step": 2085 }, { "epoch": 0.71, "learning_rate": 1.4926426906828769e-06, "logits/chosen": -0.5359357595443726, "logits/rejected": -0.5023651123046875, "logps/chosen": -215.14830017089844, "logps/rejected": -302.9642028808594, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.338423490524292, "rewards/margins": 11.991005897521973, "rewards/rejected": -12.329428672790527, "step": 2086 }, { "epoch": 0.71, "learning_rate": 1.492161631389007e-06, "logits/chosen": -0.5192973613739014, "logits/rejected": -0.5049665570259094, "logps/chosen": -229.46713256835938, "logps/rejected": -427.5558776855469, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.252953052520752, "rewards/margins": 14.845139503479004, "rewards/rejected": -17.098094940185547, "step": 2087 }, { "epoch": 0.71, "learning_rate": 1.4916804217465514e-06, "logits/chosen": -0.5593317747116089, "logits/rejected": -0.538516640663147, "logps/chosen": -214.26585388183594, "logps/rejected": -293.0240783691406, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.28556686639785767, "rewards/margins": 11.73216438293457, "rewards/rejected": -12.017730712890625, "step": 2088 }, { "epoch": 0.71, "learning_rate": 1.491199061902512e-06, "logits/chosen": -0.49289730191230774, "logits/rejected": -0.45079305768013, "logps/chosen": -202.11569213867188, "logps/rejected": -258.81475830078125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.5332577228546143, "rewards/margins": 10.964186668395996, "rewards/rejected": -11.497444152832031, "step": 2089 }, { "epoch": 0.71, "learning_rate": 1.490717552003938e-06, "logits/chosen": -0.5273037552833557, "logits/rejected": -0.5050402283668518, "logps/chosen": -242.02598571777344, "logps/rejected": -389.6156005859375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.0906400978565216, "rewards/margins": 15.385790824890137, "rewards/rejected": -15.295150756835938, "step": 2090 }, { "epoch": 0.71, "learning_rate": 1.4902358921979238e-06, "logits/chosen": -0.49329280853271484, "logits/rejected": -0.48699334263801575, "logps/chosen": -136.48524475097656, "logps/rejected": -289.4364318847656, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -2.2296454906463623, "rewards/margins": 12.867684364318848, "rewards/rejected": -15.097330093383789, "step": 2091 }, { "epoch": 0.71, "learning_rate": 1.4897540826316098e-06, "logits/chosen": -0.5592651963233948, "logits/rejected": -0.5462970733642578, "logps/chosen": -235.84852600097656, "logps/rejected": -406.2212219238281, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.13913613557815552, "rewards/margins": 18.069137573242188, "rewards/rejected": -18.208274841308594, "step": 2092 }, { "epoch": 0.71, "learning_rate": 1.4892721234521823e-06, "logits/chosen": -0.5494101643562317, "logits/rejected": -0.5210115313529968, "logps/chosen": -248.75856018066406, "logps/rejected": -347.9306335449219, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.2104072570800781, "rewards/margins": 12.785892486572266, "rewards/rejected": -13.996299743652344, "step": 2093 }, { "epoch": 0.71, "learning_rate": 1.4887900148068735e-06, "logits/chosen": -0.46642082929611206, "logits/rejected": -0.4557056725025177, "logps/chosen": -194.96128845214844, "logps/rejected": -302.5574035644531, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -0.9995325803756714, "rewards/margins": 11.733417510986328, "rewards/rejected": -12.732950210571289, "step": 2094 }, { "epoch": 0.72, "learning_rate": 1.4883077568429606e-06, "logits/chosen": -0.5260825753211975, "logits/rejected": -0.5009585022926331, "logps/chosen": -200.84803771972656, "logps/rejected": -297.4532470703125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.40801677107810974, "rewards/margins": 14.294469833374023, "rewards/rejected": -13.886452674865723, "step": 2095 }, { "epoch": 0.72, "learning_rate": 1.4878253497077662e-06, "logits/chosen": -0.5289362072944641, "logits/rejected": -0.5148253440856934, "logps/chosen": -196.81394958496094, "logps/rejected": -319.3758850097656, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.1488051414489746, "rewards/margins": 13.435734748840332, "rewards/rejected": -14.584539413452148, "step": 2096 }, { "epoch": 0.72, "learning_rate": 1.4873427935486601e-06, "logits/chosen": -0.5291875004768372, "logits/rejected": -0.5185699462890625, "logps/chosen": -220.22132873535156, "logps/rejected": -359.3702087402344, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.5635082721710205, "rewards/margins": 13.139010429382324, "rewards/rejected": -13.702518463134766, "step": 2097 }, { "epoch": 0.72, "learning_rate": 1.486860088513056e-06, "logits/chosen": -0.5880460143089294, "logits/rejected": -0.5582677125930786, "logps/chosen": -253.15496826171875, "logps/rejected": -405.87353515625, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7640405297279358, "rewards/margins": 16.054651260375977, "rewards/rejected": -16.81869125366211, "step": 2098 }, { "epoch": 0.72, "learning_rate": 1.4863772347484138e-06, "logits/chosen": -0.6094346046447754, "logits/rejected": -0.5854777693748474, "logps/chosen": -240.99610900878906, "logps/rejected": -359.0250549316406, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": 0.7099964618682861, "rewards/margins": 14.587591171264648, "rewards/rejected": -13.877595901489258, "step": 2099 }, { "epoch": 0.72, "learning_rate": 1.4858942324022382e-06, "logits/chosen": -0.5198549032211304, "logits/rejected": -0.4902588725090027, "logps/chosen": -234.0254364013672, "logps/rejected": -332.7565002441406, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.21555712819099426, "rewards/margins": 14.240153312683105, "rewards/rejected": -14.455710411071777, "step": 2100 }, { "epoch": 0.72, "learning_rate": 1.4854110816220803e-06, "logits/chosen": -0.5310806632041931, "logits/rejected": -0.4951741099357605, "logps/chosen": -214.4441680908203, "logps/rejected": -352.15966796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.76165771484375, "rewards/margins": 13.919490814208984, "rewards/rejected": -15.681148529052734, "step": 2101 }, { "epoch": 0.72, "learning_rate": 1.4849277825555358e-06, "logits/chosen": -0.49599626660346985, "logits/rejected": -0.48880735039711, "logps/chosen": -139.89781188964844, "logps/rejected": -325.1949768066406, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6210792064666748, "rewards/margins": 13.687191009521484, "rewards/rejected": -15.308270454406738, "step": 2102 }, { "epoch": 0.72, "learning_rate": 1.484444335350246e-06, "logits/chosen": -0.5301926136016846, "logits/rejected": -0.5133556723594666, "logps/chosen": -166.2943115234375, "logps/rejected": -276.6848449707031, "loss": 0.0313, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5149390697479248, "rewards/margins": 11.412796974182129, "rewards/rejected": -11.927736282348633, "step": 2103 }, { "epoch": 0.72, "learning_rate": 1.483960740153897e-06, "logits/chosen": -0.5704965591430664, "logits/rejected": -0.5265814065933228, "logps/chosen": -250.38800048828125, "logps/rejected": -357.78948974609375, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": 1.0575872659683228, "rewards/margins": 16.449783325195312, "rewards/rejected": -15.392197608947754, "step": 2104 }, { "epoch": 0.72, "learning_rate": 1.4834769971142207e-06, "logits/chosen": -0.6187666058540344, "logits/rejected": -0.5827295184135437, "logps/chosen": -216.1579132080078, "logps/rejected": -352.7438659667969, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -2.4422903060913086, "rewards/margins": 13.399147033691406, "rewards/rejected": -15.841438293457031, "step": 2105 }, { "epoch": 0.72, "learning_rate": 1.4829931063789939e-06, "logits/chosen": -0.5971725583076477, "logits/rejected": -0.5738574266433716, "logps/chosen": -234.90594482421875, "logps/rejected": -376.03485107421875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.6548030972480774, "rewards/margins": 14.34145736694336, "rewards/rejected": -14.996262550354004, "step": 2106 }, { "epoch": 0.72, "learning_rate": 1.4825090680960386e-06, "logits/chosen": -0.5295378565788269, "logits/rejected": -0.524318277835846, "logps/chosen": -181.9986572265625, "logps/rejected": -323.04345703125, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.29155856370925903, "rewards/margins": 14.422247886657715, "rewards/rejected": -14.71380615234375, "step": 2107 }, { "epoch": 0.72, "learning_rate": 1.482024882413222e-06, "logits/chosen": -0.5640272498130798, "logits/rejected": -0.5226319432258606, "logps/chosen": -196.27285766601562, "logps/rejected": -295.8553771972656, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5787426233291626, "rewards/margins": 13.238396644592285, "rewards/rejected": -14.817138671875, "step": 2108 }, { "epoch": 0.72, "learning_rate": 1.4815405494784556e-06, "logits/chosen": -0.5986145734786987, "logits/rejected": -0.5659112930297852, "logps/chosen": -222.65130615234375, "logps/rejected": -324.95452880859375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.7447770833969116, "rewards/margins": 11.475918769836426, "rewards/rejected": -13.220697402954102, "step": 2109 }, { "epoch": 0.72, "learning_rate": 1.4810560694396968e-06, "logits/chosen": -0.5439188480377197, "logits/rejected": -0.5084174871444702, "logps/chosen": -197.86416625976562, "logps/rejected": -336.78759765625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.8578190803527832, "rewards/margins": 14.928234100341797, "rewards/rejected": -15.786054611206055, "step": 2110 }, { "epoch": 0.72, "learning_rate": 1.4805714424449475e-06, "logits/chosen": -0.5889976620674133, "logits/rejected": -0.5592036247253418, "logps/chosen": -200.74444580078125, "logps/rejected": -357.0712585449219, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.6002931594848633, "rewards/margins": 15.767507553100586, "rewards/rejected": -16.367801666259766, "step": 2111 }, { "epoch": 0.72, "learning_rate": 1.4800866686422546e-06, "logits/chosen": -0.5221719741821289, "logits/rejected": -0.4640756845474243, "logps/chosen": -229.39930725097656, "logps/rejected": -249.79656982421875, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.5914101600646973, "rewards/margins": 11.305974960327148, "rewards/rejected": -11.89738655090332, "step": 2112 }, { "epoch": 0.72, "learning_rate": 1.4796017481797099e-06, "logits/chosen": -0.5655083060264587, "logits/rejected": -0.5563005805015564, "logps/chosen": -169.87078857421875, "logps/rejected": -332.67498779296875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.3994274139404297, "rewards/margins": 13.904035568237305, "rewards/rejected": -15.303462982177734, "step": 2113 }, { "epoch": 0.72, "learning_rate": 1.4791166812054495e-06, "logits/chosen": -0.5366082191467285, "logits/rejected": -0.5260128974914551, "logps/chosen": -165.73606872558594, "logps/rejected": -292.7401428222656, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.794534683227539, "rewards/margins": 11.28215503692627, "rewards/rejected": -13.076688766479492, "step": 2114 }, { "epoch": 0.72, "learning_rate": 1.4786314678676553e-06, "logits/chosen": -0.49396756291389465, "logits/rejected": -0.4796522855758667, "logps/chosen": -208.88055419921875, "logps/rejected": -342.10888671875, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.587968111038208, "rewards/margins": 15.209527015686035, "rewards/rejected": -15.797494888305664, "step": 2115 }, { "epoch": 0.72, "learning_rate": 1.4781461083145525e-06, "logits/chosen": -0.5714166164398193, "logits/rejected": -0.5517063736915588, "logps/chosen": -194.2834930419922, "logps/rejected": -340.0782165527344, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.6741461753845215, "rewards/margins": 14.259139060974121, "rewards/rejected": -15.9332857131958, "step": 2116 }, { "epoch": 0.72, "learning_rate": 1.4776606026944123e-06, "logits/chosen": -0.4770314395427704, "logits/rejected": -0.45151787996292114, "logps/chosen": -192.1939697265625, "logps/rejected": -316.770751953125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2457671165466309, "rewards/margins": 14.728096961975098, "rewards/rejected": -15.97386360168457, "step": 2117 }, { "epoch": 0.72, "learning_rate": 1.4771749511555503e-06, "logits/chosen": -0.5302048325538635, "logits/rejected": -0.5223270654678345, "logps/chosen": -208.89544677734375, "logps/rejected": -385.846435546875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.6799757480621338, "rewards/margins": 14.345224380493164, "rewards/rejected": -16.02519989013672, "step": 2118 }, { "epoch": 0.72, "learning_rate": 1.4766891538463254e-06, "logits/chosen": -0.6265913844108582, "logits/rejected": -0.5926414728164673, "logps/chosen": -208.69021606445312, "logps/rejected": -288.82470703125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 1.1760246753692627, "rewards/margins": 14.360123634338379, "rewards/rejected": -13.184099197387695, "step": 2119 }, { "epoch": 0.72, "learning_rate": 1.4762032109151426e-06, "logits/chosen": -0.5777971744537354, "logits/rejected": -0.507455050945282, "logps/chosen": -326.49212646484375, "logps/rejected": -390.83441162109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.2535521388053894, "rewards/margins": 15.54841423034668, "rewards/rejected": -15.801966667175293, "step": 2120 }, { "epoch": 0.72, "learning_rate": 1.4757171225104506e-06, "logits/chosen": -0.4679383933544159, "logits/rejected": -0.4443593919277191, "logps/chosen": -188.14381408691406, "logps/rejected": -388.416015625, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -1.6501961946487427, "rewards/margins": 16.01373291015625, "rewards/rejected": -17.663928985595703, "step": 2121 }, { "epoch": 0.72, "learning_rate": 1.4752308887807426e-06, "logits/chosen": -0.5635315775871277, "logits/rejected": -0.535396933555603, "logps/chosen": -184.656005859375, "logps/rejected": -318.2832336425781, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5734652280807495, "rewards/margins": 12.248271942138672, "rewards/rejected": -12.821736335754395, "step": 2122 }, { "epoch": 0.72, "learning_rate": 1.4747445098745565e-06, "logits/chosen": -0.502349317073822, "logits/rejected": -0.46882566809654236, "logps/chosen": -194.22573852539062, "logps/rejected": -254.9212188720703, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.8027542233467102, "rewards/margins": 11.49123477935791, "rewards/rejected": -10.688480377197266, "step": 2123 }, { "epoch": 0.72, "learning_rate": 1.4742579859404742e-06, "logits/chosen": -0.5049186944961548, "logits/rejected": -0.49442312121391296, "logps/chosen": -202.27853393554688, "logps/rejected": -352.0916442871094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8184126615524292, "rewards/margins": 13.238961219787598, "rewards/rejected": -14.057374000549316, "step": 2124 }, { "epoch": 0.73, "learning_rate": 1.4737713171271223e-06, "logits/chosen": -0.511119544506073, "logits/rejected": -0.4796014428138733, "logps/chosen": -204.2801055908203, "logps/rejected": -390.1246643066406, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.7067986130714417, "rewards/margins": 17.65205192565918, "rewards/rejected": -18.358850479125977, "step": 2125 }, { "epoch": 0.73, "learning_rate": 1.4732845035831707e-06, "logits/chosen": -0.4436621367931366, "logits/rejected": -0.45251575112342834, "logps/chosen": -199.47824096679688, "logps/rejected": -402.6374206542969, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.3723143339157104, "rewards/margins": 16.641569137573242, "rewards/rejected": -18.013883590698242, "step": 2126 }, { "epoch": 0.73, "learning_rate": 1.4727975454573355e-06, "logits/chosen": -0.5306724309921265, "logits/rejected": -0.48945170640945435, "logps/chosen": -210.5169219970703, "logps/rejected": -289.9395446777344, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.21987703442573547, "rewards/margins": 12.845787048339844, "rewards/rejected": -13.065665245056152, "step": 2127 }, { "epoch": 0.73, "learning_rate": 1.4723104428983744e-06, "logits/chosen": -0.5833815932273865, "logits/rejected": -0.5539604425430298, "logps/chosen": -195.51927185058594, "logps/rejected": -308.39794921875, "loss": 0.0257, "rewards/accuracies": 0.9375, "rewards/chosen": -1.373946189880371, "rewards/margins": 13.150374412536621, "rewards/rejected": -14.524319648742676, "step": 2128 }, { "epoch": 0.73, "learning_rate": 1.471823196055091e-06, "logits/chosen": -0.46304646134376526, "logits/rejected": -0.44205620884895325, "logps/chosen": -189.63075256347656, "logps/rejected": -302.5694580078125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.801155686378479, "rewards/margins": 10.859408378601074, "rewards/rejected": -11.660564422607422, "step": 2129 }, { "epoch": 0.73, "learning_rate": 1.4713358050763327e-06, "logits/chosen": -0.6474698185920715, "logits/rejected": -0.6107973456382751, "logps/chosen": -226.36070251464844, "logps/rejected": -329.2972412109375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.40814900398254395, "rewards/margins": 14.413599967956543, "rewards/rejected": -14.821749687194824, "step": 2130 }, { "epoch": 0.73, "learning_rate": 1.4708482701109903e-06, "logits/chosen": -0.4549654722213745, "logits/rejected": -0.4563315212726593, "logps/chosen": -113.67901611328125, "logps/rejected": -276.3797607421875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.45966994762420654, "rewards/margins": 13.556221961975098, "rewards/rejected": -14.015892028808594, "step": 2131 }, { "epoch": 0.73, "learning_rate": 1.4703605913079994e-06, "logits/chosen": -0.5003857612609863, "logits/rejected": -0.5109429955482483, "logps/chosen": -109.85760498046875, "logps/rejected": -227.42286682128906, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0108426809310913, "rewards/margins": 10.363609313964844, "rewards/rejected": -11.374451637268066, "step": 2132 }, { "epoch": 0.73, "learning_rate": 1.469872768816339e-06, "logits/chosen": -0.583911657333374, "logits/rejected": -0.5583324432373047, "logps/chosen": -223.38201904296875, "logps/rejected": -332.7571105957031, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.00981605052948, "rewards/margins": 12.468812942504883, "rewards/rejected": -13.478628158569336, "step": 2133 }, { "epoch": 0.73, "learning_rate": 1.469384802785032e-06, "logits/chosen": -0.6149716377258301, "logits/rejected": -0.5877591371536255, "logps/chosen": -242.69615173339844, "logps/rejected": -425.63690185546875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.0053992122411727905, "rewards/margins": 17.996593475341797, "rewards/rejected": -18.001991271972656, "step": 2134 }, { "epoch": 0.73, "learning_rate": 1.4688966933631455e-06, "logits/chosen": -0.4926324486732483, "logits/rejected": -0.476175993680954, "logps/chosen": -216.98834228515625, "logps/rejected": -369.9511413574219, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.321060299873352, "rewards/margins": 15.242453575134277, "rewards/rejected": -16.563512802124023, "step": 2135 }, { "epoch": 0.73, "learning_rate": 1.4684084406997901e-06, "logits/chosen": -0.5388035774230957, "logits/rejected": -0.5146637558937073, "logps/chosen": -199.5388641357422, "logps/rejected": -304.6642761230469, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.2704297304153442, "rewards/margins": 11.310762405395508, "rewards/rejected": -12.581192970275879, "step": 2136 }, { "epoch": 0.73, "learning_rate": 1.4679200449441203e-06, "logits/chosen": -0.5372567176818848, "logits/rejected": -0.5112097263336182, "logps/chosen": -212.0394287109375, "logps/rejected": -307.2156677246094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.27061301469802856, "rewards/margins": 11.579400062561035, "rewards/rejected": -11.85001277923584, "step": 2137 }, { "epoch": 0.73, "learning_rate": 1.467431506245334e-06, "logits/chosen": -0.47847315669059753, "logits/rejected": -0.4564013183116913, "logps/chosen": -222.19374084472656, "logps/rejected": -287.5140075683594, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.5724949836730957, "rewards/margins": 10.082239151000977, "rewards/rejected": -11.65473461151123, "step": 2138 }, { "epoch": 0.73, "learning_rate": 1.4669428247526735e-06, "logits/chosen": -0.5242766737937927, "logits/rejected": -0.49242663383483887, "logps/chosen": -167.36367797851562, "logps/rejected": -264.3036804199219, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.8600003719329834, "rewards/margins": 11.439445495605469, "rewards/rejected": -12.299445152282715, "step": 2139 }, { "epoch": 0.73, "learning_rate": 1.4664540006154235e-06, "logits/chosen": -0.5266361832618713, "logits/rejected": -0.5195395350456238, "logps/chosen": -232.46328735351562, "logps/rejected": -337.7623596191406, "loss": 0.0434, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6214665174484253, "rewards/margins": 14.282791137695312, "rewards/rejected": -13.661323547363281, "step": 2140 }, { "epoch": 0.73, "learning_rate": 1.4659650339829139e-06, "logits/chosen": -0.43382346630096436, "logits/rejected": -0.391426146030426, "logps/chosen": -233.7979736328125, "logps/rejected": -353.61236572265625, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -1.075453519821167, "rewards/margins": 14.334650039672852, "rewards/rejected": -15.410102844238281, "step": 2141 }, { "epoch": 0.73, "learning_rate": 1.465475925004517e-06, "logits/chosen": -0.5557999014854431, "logits/rejected": -0.5036340951919556, "logps/chosen": -292.574951171875, "logps/rejected": -304.1201477050781, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.0504021644592285, "rewards/margins": 10.667512893676758, "rewards/rejected": -11.717915534973145, "step": 2142 }, { "epoch": 0.73, "learning_rate": 1.4649866738296486e-06, "logits/chosen": -0.5083447098731995, "logits/rejected": -0.5069292783737183, "logps/chosen": -196.60824584960938, "logps/rejected": -339.59259033203125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715467929840088, "rewards/margins": 11.796512603759766, "rewards/rejected": -12.368060111999512, "step": 2143 }, { "epoch": 0.73, "learning_rate": 1.4644972806077682e-06, "logits/chosen": -0.506237268447876, "logits/rejected": -0.46542903780937195, "logps/chosen": -285.1434631347656, "logps/rejected": -454.5724792480469, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.23319050669670105, "rewards/margins": 18.7344913482666, "rewards/rejected": -18.501300811767578, "step": 2144 }, { "epoch": 0.73, "learning_rate": 1.4640077454883788e-06, "logits/chosen": -0.4997346103191376, "logits/rejected": -0.4863850772380829, "logps/chosen": -185.35887145996094, "logps/rejected": -339.97088623046875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.6861664056777954, "rewards/margins": 14.987630844116211, "rewards/rejected": -15.673796653747559, "step": 2145 }, { "epoch": 0.73, "learning_rate": 1.4635180686210267e-06, "logits/chosen": -0.4965716600418091, "logits/rejected": -0.475848913192749, "logps/chosen": -228.88636779785156, "logps/rejected": -354.9420471191406, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -1.5194647312164307, "rewards/margins": 11.81346321105957, "rewards/rejected": -13.332929611206055, "step": 2146 }, { "epoch": 0.73, "learning_rate": 1.4630282501553014e-06, "logits/chosen": -0.5033618211746216, "logits/rejected": -0.45291364192962646, "logps/chosen": -193.3283233642578, "logps/rejected": -187.61846923828125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 0.31354185938835144, "rewards/margins": 8.721590042114258, "rewards/rejected": -8.408047676086426, "step": 2147 }, { "epoch": 0.73, "learning_rate": 1.4625382902408354e-06, "logits/chosen": -0.5141810774803162, "logits/rejected": -0.49224382638931274, "logps/chosen": -189.78050231933594, "logps/rejected": -344.4352111816406, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.776747465133667, "rewards/margins": 14.455177307128906, "rewards/rejected": -15.231925010681152, "step": 2148 }, { "epoch": 0.73, "learning_rate": 1.4620481890273049e-06, "logits/chosen": -0.5449850559234619, "logits/rejected": -0.4900166988372803, "logps/chosen": -248.3175811767578, "logps/rejected": -340.47052001953125, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 0.19563144445419312, "rewards/margins": 15.576876640319824, "rewards/rejected": -15.381247520446777, "step": 2149 }, { "epoch": 0.73, "learning_rate": 1.461557946664429e-06, "logits/chosen": -0.5204941034317017, "logits/rejected": -0.47555258870124817, "logps/chosen": -215.99932861328125, "logps/rejected": -385.767333984375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6366040706634521, "rewards/margins": 15.555441856384277, "rewards/rejected": -17.192045211791992, "step": 2150 }, { "epoch": 0.73, "learning_rate": 1.4610675633019703e-06, "logits/chosen": -0.561072587966919, "logits/rejected": -0.5453841686248779, "logps/chosen": -189.00830078125, "logps/rejected": -324.5400085449219, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.06710686534643173, "rewards/margins": 14.611133575439453, "rewards/rejected": -14.544027328491211, "step": 2151 }, { "epoch": 0.73, "learning_rate": 1.4605770390897337e-06, "logits/chosen": -0.5482082366943359, "logits/rejected": -0.5295065641403198, "logps/chosen": -311.8646545410156, "logps/rejected": -474.5324401855469, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.3655201196670532, "rewards/margins": 16.044837951660156, "rewards/rejected": -16.410358428955078, "step": 2152 }, { "epoch": 0.73, "learning_rate": 1.4600863741775675e-06, "logits/chosen": -0.5763909816741943, "logits/rejected": -0.5140304565429688, "logps/chosen": -277.1844177246094, "logps/rejected": -362.98114013671875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.6449042558670044, "rewards/margins": 14.976887702941895, "rewards/rejected": -14.33198356628418, "step": 2153 }, { "epoch": 0.74, "learning_rate": 1.4595955687153638e-06, "logits/chosen": -0.43241333961486816, "logits/rejected": -0.4079540967941284, "logps/chosen": -176.03346252441406, "logps/rejected": -236.2036895751953, "loss": 0.1484, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0391269326210022, "rewards/margins": 10.492956161499023, "rewards/rejected": -10.532082557678223, "step": 2154 }, { "epoch": 0.74, "learning_rate": 1.4591046228530562e-06, "logits/chosen": -0.5038211941719055, "logits/rejected": -0.4746561348438263, "logps/chosen": -202.42532348632812, "logps/rejected": -315.9511413574219, "loss": 0.0516, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7018346190452576, "rewards/margins": 13.924234390258789, "rewards/rejected": -14.626070022583008, "step": 2155 }, { "epoch": 0.74, "learning_rate": 1.4586135367406222e-06, "logits/chosen": -0.5994734168052673, "logits/rejected": -0.5800643563270569, "logps/chosen": -188.99354553222656, "logps/rejected": -262.89678955078125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.8467806577682495, "rewards/margins": 10.037674903869629, "rewards/rejected": -10.884454727172852, "step": 2156 }, { "epoch": 0.74, "learning_rate": 1.4581223105280817e-06, "logits/chosen": -0.5340209603309631, "logits/rejected": -0.48281824588775635, "logps/chosen": -215.43028259277344, "logps/rejected": -311.3991394042969, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.31382474303245544, "rewards/margins": 11.255640983581543, "rewards/rejected": -11.569466590881348, "step": 2157 }, { "epoch": 0.74, "learning_rate": 1.4576309443654975e-06, "logits/chosen": -0.6047146320343018, "logits/rejected": -0.5499386191368103, "logps/chosen": -233.46470642089844, "logps/rejected": -403.74530029296875, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -2.0086405277252197, "rewards/margins": 16.47583770751953, "rewards/rejected": -18.484479904174805, "step": 2158 }, { "epoch": 0.74, "learning_rate": 1.4571394384029752e-06, "logits/chosen": -0.5080256462097168, "logits/rejected": -0.47995609045028687, "logps/chosen": -244.66180419921875, "logps/rejected": -345.9827880859375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.6956056356430054, "rewards/margins": 12.821127891540527, "rewards/rejected": -14.516733169555664, "step": 2159 }, { "epoch": 0.74, "learning_rate": 1.456647792790663e-06, "logits/chosen": -0.6200403571128845, "logits/rejected": -0.606336236000061, "logps/chosen": -216.35025024414062, "logps/rejected": -405.476318359375, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": 0.24729274213314056, "rewards/margins": 18.244930267333984, "rewards/rejected": -17.997638702392578, "step": 2160 }, { "epoch": 0.74, "learning_rate": 1.4561560076787524e-06, "logits/chosen": -0.5629329681396484, "logits/rejected": -0.5475695133209229, "logps/chosen": -249.50662231445312, "logps/rejected": -377.611083984375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.49331000447273254, "rewards/margins": 13.074877738952637, "rewards/rejected": -13.568188667297363, "step": 2161 }, { "epoch": 0.74, "learning_rate": 1.4556640832174763e-06, "logits/chosen": -0.5657458901405334, "logits/rejected": -0.5475929975509644, "logps/chosen": -194.8322296142578, "logps/rejected": -390.8619079589844, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.9629052877426147, "rewards/margins": 17.071529388427734, "rewards/rejected": -19.034433364868164, "step": 2162 }, { "epoch": 0.74, "learning_rate": 1.455172019557111e-06, "logits/chosen": -0.45320233702659607, "logits/rejected": -0.44905850291252136, "logps/chosen": -149.56187438964844, "logps/rejected": -276.05548095703125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6789640188217163, "rewards/margins": 12.321959495544434, "rewards/rejected": -13.000923156738281, "step": 2163 }, { "epoch": 0.74, "learning_rate": 1.4546798168479756e-06, "logits/chosen": -0.6875141859054565, "logits/rejected": -0.6547267436981201, "logps/chosen": -245.35723876953125, "logps/rejected": -359.3857727050781, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.372877597808838, "rewards/margins": 13.506802558898926, "rewards/rejected": -15.879681587219238, "step": 2164 }, { "epoch": 0.74, "learning_rate": 1.4541874752404305e-06, "logits/chosen": -0.4978102445602417, "logits/rejected": -0.47397497296333313, "logps/chosen": -139.1393585205078, "logps/rejected": -295.4342956542969, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.6163323521614075, "rewards/margins": 14.747549057006836, "rewards/rejected": -15.36388111114502, "step": 2165 }, { "epoch": 0.74, "learning_rate": 1.4536949948848799e-06, "logits/chosen": -0.4657578468322754, "logits/rejected": -0.43079468607902527, "logps/chosen": -148.46490478515625, "logps/rejected": -217.61245727539062, "loss": 0.0265, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1343574523925781, "rewards/margins": 9.889766693115234, "rewards/rejected": -11.024124145507812, "step": 2166 }, { "epoch": 0.74, "learning_rate": 1.453202375931769e-06, "logits/chosen": -0.5847946405410767, "logits/rejected": -0.536016583442688, "logps/chosen": -300.7135009765625, "logps/rejected": -405.407470703125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.194337859749794, "rewards/margins": 15.554966926574707, "rewards/rejected": -15.749305725097656, "step": 2167 }, { "epoch": 0.74, "learning_rate": 1.452709618531587e-06, "logits/chosen": -0.5221882462501526, "logits/rejected": -0.49367231130599976, "logps/chosen": -178.0052032470703, "logps/rejected": -263.3550109863281, "loss": 0.0264, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1426254510879517, "rewards/margins": 9.87506103515625, "rewards/rejected": -11.01768684387207, "step": 2168 }, { "epoch": 0.74, "learning_rate": 1.4522167228348638e-06, "logits/chosen": -0.49893566966056824, "logits/rejected": -0.46406298875808716, "logps/chosen": -162.98497009277344, "logps/rejected": -244.04080200195312, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4642083644866943, "rewards/margins": 10.516314506530762, "rewards/rejected": -11.980522155761719, "step": 2169 }, { "epoch": 0.74, "learning_rate": 1.4517236889921729e-06, "logits/chosen": -0.5579866170883179, "logits/rejected": -0.5402552485466003, "logps/chosen": -231.6289825439453, "logps/rejected": -374.504150390625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.9129780530929565, "rewards/margins": 13.391485214233398, "rewards/rejected": -14.304461479187012, "step": 2170 }, { "epoch": 0.74, "learning_rate": 1.4512305171541287e-06, "logits/chosen": -0.5958366394042969, "logits/rejected": -0.5691744685173035, "logps/chosen": -197.9598846435547, "logps/rejected": -270.2397155761719, "loss": 0.0814, "rewards/accuracies": 0.9375, "rewards/chosen": -1.723388433456421, "rewards/margins": 11.165099143981934, "rewards/rejected": -12.888487815856934, "step": 2171 }, { "epoch": 0.74, "learning_rate": 1.4507372074713886e-06, "logits/chosen": -0.5665766596794128, "logits/rejected": -0.5396080017089844, "logps/chosen": -251.33837890625, "logps/rejected": -328.1939697265625, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.19099581241607666, "rewards/margins": 10.1830472946167, "rewards/rejected": -10.374041557312012, "step": 2172 }, { "epoch": 0.74, "learning_rate": 1.450243760094652e-06, "logits/chosen": -0.5956019759178162, "logits/rejected": -0.5719362497329712, "logps/chosen": -164.57794189453125, "logps/rejected": -300.65606689453125, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -2.2046051025390625, "rewards/margins": 12.843672752380371, "rewards/rejected": -15.048276901245117, "step": 2173 }, { "epoch": 0.74, "learning_rate": 1.44975017517466e-06, "logits/chosen": -0.42936795949935913, "logits/rejected": -0.40368175506591797, "logps/chosen": -207.00213623046875, "logps/rejected": -313.33282470703125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.3220668137073517, "rewards/margins": 13.452239036560059, "rewards/rejected": -13.130171775817871, "step": 2174 }, { "epoch": 0.74, "learning_rate": 1.4492564528621966e-06, "logits/chosen": -0.5432290434837341, "logits/rejected": -0.5372269749641418, "logps/chosen": -190.47152709960938, "logps/rejected": -288.5091552734375, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -0.8716861605644226, "rewards/margins": 11.798870086669922, "rewards/rejected": -12.67055606842041, "step": 2175 }, { "epoch": 0.74, "learning_rate": 1.4487625933080866e-06, "logits/chosen": -0.6241050958633423, "logits/rejected": -0.598702609539032, "logps/chosen": -193.8738250732422, "logps/rejected": -350.43896484375, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.18211974203586578, "rewards/margins": 15.135220527648926, "rewards/rejected": -15.317339897155762, "step": 2176 }, { "epoch": 0.74, "learning_rate": 1.448268596663197e-06, "logits/chosen": -0.5283844470977783, "logits/rejected": -0.5053085684776306, "logps/chosen": -222.9901885986328, "logps/rejected": -345.301025390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 1.064475417137146, "rewards/margins": 14.353026390075684, "rewards/rejected": -13.28855037689209, "step": 2177 }, { "epoch": 0.74, "learning_rate": 1.4477744630784377e-06, "logits/chosen": -0.5095547437667847, "logits/rejected": -0.47021931409835815, "logps/chosen": -251.40786743164062, "logps/rejected": -321.6919250488281, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.1172597408294678, "rewards/margins": 11.811931610107422, "rewards/rejected": -13.929190635681152, "step": 2178 }, { "epoch": 0.74, "learning_rate": 1.4472801927047592e-06, "logits/chosen": -0.4033959209918976, "logits/rejected": -0.3616032600402832, "logps/chosen": -193.9929962158203, "logps/rejected": -283.2353515625, "loss": 0.0465, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5826858282089233, "rewards/margins": 12.05663776397705, "rewards/rejected": -13.639322280883789, "step": 2179 }, { "epoch": 0.74, "learning_rate": 1.4467857856931544e-06, "logits/chosen": -0.559742271900177, "logits/rejected": -0.5417432188987732, "logps/chosen": -210.22572326660156, "logps/rejected": -388.8891906738281, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1706821918487549, "rewards/margins": 16.637184143066406, "rewards/rejected": -17.8078670501709, "step": 2180 }, { "epoch": 0.74, "learning_rate": 1.4462912421946584e-06, "logits/chosen": -0.5476263761520386, "logits/rejected": -0.5285726189613342, "logps/chosen": -213.75790405273438, "logps/rejected": -340.2847900390625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.098923683166504, "rewards/margins": 11.61711311340332, "rewards/rejected": -13.71603775024414, "step": 2181 }, { "epoch": 0.74, "learning_rate": 1.445796562360346e-06, "logits/chosen": -0.6113523244857788, "logits/rejected": -0.5851444602012634, "logps/chosen": -186.2764892578125, "logps/rejected": -323.0975341796875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.3678831458091736, "rewards/margins": 14.224235534667969, "rewards/rejected": -14.592117309570312, "step": 2182 }, { "epoch": 0.75, "learning_rate": 1.4453017463413364e-06, "logits/chosen": -0.5349954962730408, "logits/rejected": -0.5267874598503113, "logps/chosen": -145.15707397460938, "logps/rejected": -343.45611572265625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.134300947189331, "rewards/margins": 15.4205904006958, "rewards/rejected": -16.55489158630371, "step": 2183 }, { "epoch": 0.75, "learning_rate": 1.4448067942887885e-06, "logits/chosen": -0.5421499609947205, "logits/rejected": -0.5055617690086365, "logps/chosen": -222.17242431640625, "logps/rejected": -369.27996826171875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.5546185970306396, "rewards/margins": 13.661989212036133, "rewards/rejected": -14.216608047485352, "step": 2184 }, { "epoch": 0.75, "learning_rate": 1.4443117063539037e-06, "logits/chosen": -0.6673126816749573, "logits/rejected": -0.6622524261474609, "logps/chosen": -224.56936645507812, "logps/rejected": -375.83837890625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.6110694408416748, "rewards/margins": 13.666082382202148, "rewards/rejected": -15.277151107788086, "step": 2185 }, { "epoch": 0.75, "learning_rate": 1.4438164826879239e-06, "logits/chosen": -0.46919217705726624, "logits/rejected": -0.44778934121131897, "logps/chosen": -157.38650512695312, "logps/rejected": -317.4690246582031, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.015465155243873596, "rewards/margins": 15.826972961425781, "rewards/rejected": -15.811507225036621, "step": 2186 }, { "epoch": 0.75, "learning_rate": 1.4433211234421335e-06, "logits/chosen": -0.5601598620414734, "logits/rejected": -0.5397898554801941, "logps/chosen": -167.17037963867188, "logps/rejected": -275.2540283203125, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -0.9132910966873169, "rewards/margins": 11.012031555175781, "rewards/rejected": -11.925323486328125, "step": 2187 }, { "epoch": 0.75, "learning_rate": 1.4428256287678578e-06, "logits/chosen": -0.526291012763977, "logits/rejected": -0.48222556710243225, "logps/chosen": -200.36395263671875, "logps/rejected": -288.5234069824219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3312455415725708, "rewards/margins": 12.262969017028809, "rewards/rejected": -13.59421443939209, "step": 2188 }, { "epoch": 0.75, "learning_rate": 1.4423299988164639e-06, "logits/chosen": -0.5159062743186951, "logits/rejected": -0.453349769115448, "logps/chosen": -220.76893615722656, "logps/rejected": -263.6129150390625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -2.0200905799865723, "rewards/margins": 10.945881843566895, "rewards/rejected": -12.965971946716309, "step": 2189 }, { "epoch": 0.75, "learning_rate": 1.4418342337393595e-06, "logits/chosen": -0.5245163440704346, "logits/rejected": -0.5168442130088806, "logps/chosen": -229.76185607910156, "logps/rejected": -378.46044921875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.3224165439605713, "rewards/margins": 14.061540603637695, "rewards/rejected": -15.38395881652832, "step": 2190 }, { "epoch": 0.75, "learning_rate": 1.4413383336879941e-06, "logits/chosen": -0.5584107041358948, "logits/rejected": -0.5180322527885437, "logps/chosen": -224.93533325195312, "logps/rejected": -324.49810791015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9423776268959045, "rewards/margins": 10.948651313781738, "rewards/rejected": -11.89102840423584, "step": 2191 }, { "epoch": 0.75, "learning_rate": 1.4408422988138582e-06, "logits/chosen": -0.46027880907058716, "logits/rejected": -0.39430299401283264, "logps/chosen": -235.3748016357422, "logps/rejected": -233.9471435546875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.3264571726322174, "rewards/margins": 9.981462478637695, "rewards/rejected": -10.3079195022583, "step": 2192 }, { "epoch": 0.75, "learning_rate": 1.440346129268484e-06, "logits/chosen": -0.5668777227401733, "logits/rejected": -0.5456669926643372, "logps/chosen": -201.01849365234375, "logps/rejected": -318.97467041015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7632137537002563, "rewards/margins": 13.072242736816406, "rewards/rejected": -14.835456848144531, "step": 2193 }, { "epoch": 0.75, "learning_rate": 1.4398498252034444e-06, "logits/chosen": -0.5725595355033875, "logits/rejected": -0.5525723099708557, "logps/chosen": -123.61016845703125, "logps/rejected": -257.86865234375, "loss": 0.033, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6064567565917969, "rewards/margins": 14.004154205322266, "rewards/rejected": -14.610612869262695, "step": 2194 }, { "epoch": 0.75, "learning_rate": 1.4393533867703526e-06, "logits/chosen": -0.4890071749687195, "logits/rejected": -0.45795390009880066, "logps/chosen": -203.26771545410156, "logps/rejected": -339.8895263671875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": 0.005108892917633057, "rewards/margins": 14.408740997314453, "rewards/rejected": -14.403632164001465, "step": 2195 }, { "epoch": 0.75, "learning_rate": 1.438856814120865e-06, "logits/chosen": -0.5135843753814697, "logits/rejected": -0.4612600803375244, "logps/chosen": -222.6658172607422, "logps/rejected": -348.43927001953125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.298991858959198, "rewards/margins": 14.593355178833008, "rewards/rejected": -14.89234733581543, "step": 2196 }, { "epoch": 0.75, "learning_rate": 1.4383601074066767e-06, "logits/chosen": -0.4811420738697052, "logits/rejected": -0.454553484916687, "logps/chosen": -205.21636962890625, "logps/rejected": -285.90966796875, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.829619288444519, "rewards/margins": 12.430797576904297, "rewards/rejected": -13.260416030883789, "step": 2197 }, { "epoch": 0.75, "learning_rate": 1.4378632667795252e-06, "logits/chosen": -0.5357449650764465, "logits/rejected": -0.4955723285675049, "logps/chosen": -197.80564880371094, "logps/rejected": -258.69366455078125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.9998728036880493, "rewards/margins": 9.744302749633789, "rewards/rejected": -10.744174003601074, "step": 2198 }, { "epoch": 0.75, "learning_rate": 1.4373662923911885e-06, "logits/chosen": -0.45895126461982727, "logits/rejected": -0.4275798797607422, "logps/chosen": -215.32601928710938, "logps/rejected": -328.0521240234375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5193891525268555, "rewards/margins": 11.93871784210205, "rewards/rejected": -12.45810604095459, "step": 2199 }, { "epoch": 0.75, "learning_rate": 1.4368691843934851e-06, "logits/chosen": -0.5434914231300354, "logits/rejected": -0.5211105346679688, "logps/chosen": -190.20143127441406, "logps/rejected": -273.0708923339844, "loss": 0.0168, "rewards/accuracies": 0.9375, "rewards/chosen": -2.309537410736084, "rewards/margins": 9.521747589111328, "rewards/rejected": -11.83128547668457, "step": 2200 }, { "epoch": 0.75, "learning_rate": 1.4363719429382747e-06, "logits/chosen": -0.4634946286678314, "logits/rejected": -0.41672781109809875, "logps/chosen": -207.10745239257812, "logps/rejected": -301.5885314941406, "loss": 0.0125, "rewards/accuracies": 0.9375, "rewards/chosen": -0.20424500107765198, "rewards/margins": 13.91176700592041, "rewards/rejected": -14.116011619567871, "step": 2201 }, { "epoch": 0.75, "learning_rate": 1.435874568177458e-06, "logits/chosen": -0.5109480619430542, "logits/rejected": -0.49234604835510254, "logps/chosen": -242.5211181640625, "logps/rejected": -422.5233154296875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.21796327829360962, "rewards/margins": 14.955734252929688, "rewards/rejected": -15.173697471618652, "step": 2202 }, { "epoch": 0.75, "learning_rate": 1.4353770602629758e-06, "logits/chosen": -0.567963719367981, "logits/rejected": -0.5289981365203857, "logps/chosen": -209.88232421875, "logps/rejected": -289.7960205078125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.3789302110671997, "rewards/margins": 12.163118362426758, "rewards/rejected": -12.542047500610352, "step": 2203 }, { "epoch": 0.75, "learning_rate": 1.43487941934681e-06, "logits/chosen": -0.532132089138031, "logits/rejected": -0.49107688665390015, "logps/chosen": -248.18914794921875, "logps/rejected": -308.6422119140625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.4485235810279846, "rewards/margins": 10.045713424682617, "rewards/rejected": -10.494236946105957, "step": 2204 }, { "epoch": 0.75, "learning_rate": 1.4343816455809833e-06, "logits/chosen": -0.4659033715724945, "logits/rejected": -0.42106977105140686, "logps/chosen": -212.3151397705078, "logps/rejected": -336.4394226074219, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.05218003690242767, "rewards/margins": 15.841901779174805, "rewards/rejected": -15.89408016204834, "step": 2205 }, { "epoch": 0.75, "learning_rate": 1.433883739117558e-06, "logits/chosen": -0.5484252572059631, "logits/rejected": -0.548627495765686, "logps/chosen": -226.2056884765625, "logps/rejected": -458.98516845703125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.006839632987976, "rewards/margins": 19.986787796020508, "rewards/rejected": -20.993627548217773, "step": 2206 }, { "epoch": 0.75, "learning_rate": 1.4333857001086385e-06, "logits/chosen": -0.5496196150779724, "logits/rejected": -0.5145502686500549, "logps/chosen": -196.57838439941406, "logps/rejected": -377.820556640625, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -0.6575266122817993, "rewards/margins": 17.822229385375977, "rewards/rejected": -18.479755401611328, "step": 2207 }, { "epoch": 0.75, "learning_rate": 1.4328875287063685e-06, "logits/chosen": -0.5201423764228821, "logits/rejected": -0.4753211736679077, "logps/chosen": -219.4962158203125, "logps/rejected": -275.35638427734375, "loss": 0.0346, "rewards/accuracies": 0.9375, "rewards/chosen": -0.39785924553871155, "rewards/margins": 10.993276596069336, "rewards/rejected": -11.391136169433594, "step": 2208 }, { "epoch": 0.75, "learning_rate": 1.4323892250629325e-06, "logits/chosen": -0.49902263283729553, "logits/rejected": -0.46384957432746887, "logps/chosen": -208.38784790039062, "logps/rejected": -276.88665771484375, "loss": 0.0254, "rewards/accuracies": 0.9375, "rewards/chosen": 0.00652042031288147, "rewards/margins": 12.628145217895508, "rewards/rejected": -12.621625900268555, "step": 2209 }, { "epoch": 0.75, "learning_rate": 1.4318907893305548e-06, "logits/chosen": -0.5239381194114685, "logits/rejected": -0.4723849296569824, "logps/chosen": -188.59066772460938, "logps/rejected": -321.64306640625, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.3312458992004395, "rewards/margins": 15.353362083435059, "rewards/rejected": -16.684606552124023, "step": 2210 }, { "epoch": 0.75, "learning_rate": 1.4313922216615018e-06, "logits/chosen": -0.4535781443119049, "logits/rejected": -0.44784650206565857, "logps/chosen": -225.79611206054688, "logps/rejected": -331.6329345703125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.4407654404640198, "rewards/margins": 10.661873817443848, "rewards/rejected": -11.102638244628906, "step": 2211 }, { "epoch": 0.75, "learning_rate": 1.430893522208078e-06, "logits/chosen": -0.5213991403579712, "logits/rejected": -0.5008474588394165, "logps/chosen": -156.33309936523438, "logps/rejected": -280.011962890625, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.1459627151489258, "rewards/margins": 11.761983871459961, "rewards/rejected": -12.907946586608887, "step": 2212 }, { "epoch": 0.76, "learning_rate": 1.4303946911226298e-06, "logits/chosen": -0.5357057452201843, "logits/rejected": -0.5308802723884583, "logps/chosen": -216.82473754882812, "logps/rejected": -333.0201721191406, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.8145917654037476, "rewards/margins": 11.289384841918945, "rewards/rejected": -13.10397720336914, "step": 2213 }, { "epoch": 0.76, "learning_rate": 1.429895728557543e-06, "logits/chosen": -0.4763183891773224, "logits/rejected": -0.43391597270965576, "logps/chosen": -199.5858154296875, "logps/rejected": -313.7148742675781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5240581035614014, "rewards/margins": 14.436123847961426, "rewards/rejected": -14.96018123626709, "step": 2214 }, { "epoch": 0.76, "learning_rate": 1.429396634665244e-06, "logits/chosen": -0.4426734745502472, "logits/rejected": -0.4303022623062134, "logps/chosen": -182.04916381835938, "logps/rejected": -283.2855224609375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.4948298931121826, "rewards/margins": 12.395417213439941, "rewards/rejected": -13.890247344970703, "step": 2215 }, { "epoch": 0.76, "learning_rate": 1.4288974095981986e-06, "logits/chosen": -0.38598060607910156, "logits/rejected": -0.3618248701095581, "logps/chosen": -190.16473388671875, "logps/rejected": -233.27320861816406, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -2.0167179107666016, "rewards/margins": 8.054780006408691, "rewards/rejected": -10.071497917175293, "step": 2216 }, { "epoch": 0.76, "learning_rate": 1.4283980535089137e-06, "logits/chosen": -0.48080718517303467, "logits/rejected": -0.46937915682792664, "logps/chosen": -158.47950744628906, "logps/rejected": -265.3516540527344, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.7917848825454712, "rewards/margins": 11.51425552368164, "rewards/rejected": -12.306039810180664, "step": 2217 }, { "epoch": 0.76, "learning_rate": 1.4278985665499357e-06, "logits/chosen": -0.4959927797317505, "logits/rejected": -0.454569935798645, "logps/chosen": -227.63133239746094, "logps/rejected": -340.433837890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.8624799251556396, "rewards/margins": 11.965807914733887, "rewards/rejected": -13.828289031982422, "step": 2218 }, { "epoch": 0.76, "learning_rate": 1.4273989488738503e-06, "logits/chosen": -0.5614607334136963, "logits/rejected": -0.553277850151062, "logps/chosen": -115.51860809326172, "logps/rejected": -255.17601013183594, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.7250618934631348, "rewards/margins": 11.571125030517578, "rewards/rejected": -13.296185493469238, "step": 2219 }, { "epoch": 0.76, "learning_rate": 1.4268992006332845e-06, "logits/chosen": -0.574255645275116, "logits/rejected": -0.5571273565292358, "logps/chosen": -259.5254211425781, "logps/rejected": -413.7177734375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.8594743013381958, "rewards/margins": 14.547721862792969, "rewards/rejected": -16.407196044921875, "step": 2220 }, { "epoch": 0.76, "learning_rate": 1.4263993219809042e-06, "logits/chosen": -0.48692673444747925, "logits/rejected": -0.4638481140136719, "logps/chosen": -159.20826721191406, "logps/rejected": -316.5672912597656, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.063766598701477, "rewards/margins": 13.771876335144043, "rewards/rejected": -14.83564281463623, "step": 2221 }, { "epoch": 0.76, "learning_rate": 1.4258993130694155e-06, "logits/chosen": -0.5276625156402588, "logits/rejected": -0.4998495578765869, "logps/chosen": -187.91107177734375, "logps/rejected": -306.46649169921875, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.1928550004959106, "rewards/margins": 13.365387916564941, "rewards/rejected": -14.558243751525879, "step": 2222 }, { "epoch": 0.76, "learning_rate": 1.425399174051564e-06, "logits/chosen": -0.5812054872512817, "logits/rejected": -0.5542027950286865, "logps/chosen": -218.6334991455078, "logps/rejected": -362.0291748046875, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.6956521272659302, "rewards/margins": 14.174025535583496, "rewards/rejected": -15.86967658996582, "step": 2223 }, { "epoch": 0.76, "learning_rate": 1.424898905080136e-06, "logits/chosen": -0.5930135846138, "logits/rejected": -0.567785382270813, "logps/chosen": -190.96925354003906, "logps/rejected": -331.2921447753906, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0203815698623657, "rewards/margins": 13.110236167907715, "rewards/rejected": -14.130619049072266, "step": 2224 }, { "epoch": 0.76, "learning_rate": 1.4243985063079555e-06, "logits/chosen": -0.4874306917190552, "logits/rejected": -0.46896642446517944, "logps/chosen": -231.0947723388672, "logps/rejected": -382.45611572265625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.6514506936073303, "rewards/margins": 17.11346435546875, "rewards/rejected": -17.764915466308594, "step": 2225 }, { "epoch": 0.76, "learning_rate": 1.4238979778878885e-06, "logits/chosen": -0.5013043284416199, "logits/rejected": -0.4724269509315491, "logps/chosen": -266.11602783203125, "logps/rejected": -397.7291564941406, "loss": 0.0768, "rewards/accuracies": 1.0, "rewards/chosen": -2.3434360027313232, "rewards/margins": 12.814164161682129, "rewards/rejected": -15.157600402832031, "step": 2226 }, { "epoch": 0.76, "learning_rate": 1.4233973199728393e-06, "logits/chosen": -0.47375789284706116, "logits/rejected": -0.47430694103240967, "logps/chosen": -190.65618896484375, "logps/rejected": -362.993408203125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.343914270401001, "rewards/margins": 12.163041114807129, "rewards/rejected": -13.506954193115234, "step": 2227 }, { "epoch": 0.76, "learning_rate": 1.4228965327157516e-06, "logits/chosen": -0.4255712330341339, "logits/rejected": -0.39853358268737793, "logps/chosen": -133.49649047851562, "logps/rejected": -205.2260284423828, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.2781339883804321, "rewards/margins": 10.55362606048584, "rewards/rejected": -11.83176040649414, "step": 2228 }, { "epoch": 0.76, "learning_rate": 1.4223956162696094e-06, "logits/chosen": -0.6147939562797546, "logits/rejected": -0.5772974491119385, "logps/chosen": -198.70701599121094, "logps/rejected": -316.3137512207031, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.5061585903167725, "rewards/margins": 13.871916770935059, "rewards/rejected": -15.378074645996094, "step": 2229 }, { "epoch": 0.76, "learning_rate": 1.4218945707874356e-06, "logits/chosen": -0.5732109546661377, "logits/rejected": -0.5048368573188782, "logps/chosen": -230.8161163330078, "logps/rejected": -330.1003112792969, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.042102083563804626, "rewards/margins": 15.564322471618652, "rewards/rejected": -15.522220611572266, "step": 2230 }, { "epoch": 0.76, "learning_rate": 1.4213933964222925e-06, "logits/chosen": -0.5122208595275879, "logits/rejected": -0.4543246030807495, "logps/chosen": -256.0243835449219, "logps/rejected": -389.4415588378906, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8646517395973206, "rewards/margins": 15.39378833770752, "rewards/rejected": -16.258440017700195, "step": 2231 }, { "epoch": 0.76, "learning_rate": 1.4208920933272826e-06, "logits/chosen": -0.5450139045715332, "logits/rejected": -0.508906364440918, "logps/chosen": -165.10052490234375, "logps/rejected": -288.540283203125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -2.3077259063720703, "rewards/margins": 13.180837631225586, "rewards/rejected": -15.48856258392334, "step": 2232 }, { "epoch": 0.76, "learning_rate": 1.4203906616555466e-06, "logits/chosen": -0.4501361846923828, "logits/rejected": -0.4253217279911041, "logps/chosen": -175.05404663085938, "logps/rejected": -310.9698791503906, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.7221713066101074, "rewards/margins": 12.112175941467285, "rewards/rejected": -13.834348678588867, "step": 2233 }, { "epoch": 0.76, "learning_rate": 1.4198891015602646e-06, "logits/chosen": -0.41794705390930176, "logits/rejected": -0.4036904573440552, "logps/chosen": -148.571044921875, "logps/rejected": -288.8743591308594, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.2188200950622559, "rewards/margins": 11.860180854797363, "rewards/rejected": -13.079000473022461, "step": 2234 }, { "epoch": 0.76, "learning_rate": 1.4193874131946567e-06, "logits/chosen": -0.5121262669563293, "logits/rejected": -0.4490603804588318, "logps/chosen": -225.06271362304688, "logps/rejected": -325.2867736816406, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.5959298610687256, "rewards/margins": 13.213577270507812, "rewards/rejected": -13.809507369995117, "step": 2235 }, { "epoch": 0.76, "learning_rate": 1.418885596711982e-06, "logits/chosen": -0.4415646195411682, "logits/rejected": -0.43549612164497375, "logps/chosen": -276.20550537109375, "logps/rejected": -430.986083984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.483611583709717, "rewards/margins": 14.560254096984863, "rewards/rejected": -17.043865203857422, "step": 2236 }, { "epoch": 0.76, "learning_rate": 1.418383652265538e-06, "logits/chosen": -0.48310375213623047, "logits/rejected": -0.4700334072113037, "logps/chosen": -192.55889892578125, "logps/rejected": -317.4955139160156, "loss": 0.0479, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6509556770324707, "rewards/margins": 11.775964736938477, "rewards/rejected": -13.426920890808105, "step": 2237 }, { "epoch": 0.76, "learning_rate": 1.4178815800086623e-06, "logits/chosen": -0.4420875608921051, "logits/rejected": -0.4214114844799042, "logps/chosen": -197.21743774414062, "logps/rejected": -285.92529296875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.449504017829895, "rewards/margins": 12.41940975189209, "rewards/rejected": -13.868913650512695, "step": 2238 }, { "epoch": 0.76, "learning_rate": 1.4173793800947306e-06, "logits/chosen": -0.4683525264263153, "logits/rejected": -0.38986748456954956, "logps/chosen": -186.83529663085938, "logps/rejected": -283.25439453125, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -1.6356160640716553, "rewards/margins": 13.997760772705078, "rewards/rejected": -15.633377075195312, "step": 2239 }, { "epoch": 0.76, "learning_rate": 1.4168770526771583e-06, "logits/chosen": -0.5099378824234009, "logits/rejected": -0.48355427384376526, "logps/chosen": -274.2300720214844, "logps/rejected": -375.3101501464844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4147446155548096, "rewards/margins": 13.380282402038574, "rewards/rejected": -15.795026779174805, "step": 2240 }, { "epoch": 0.76, "learning_rate": 1.4163745979093995e-06, "logits/chosen": -0.4613083600997925, "logits/rejected": -0.4367608428001404, "logps/chosen": -174.8914031982422, "logps/rejected": -313.7483825683594, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.5895745754241943, "rewards/margins": 12.272210121154785, "rewards/rejected": -13.861784934997559, "step": 2241 }, { "epoch": 0.77, "learning_rate": 1.4158720159449473e-06, "logits/chosen": -0.48988720774650574, "logits/rejected": -0.4380054473876953, "logps/chosen": -269.5075378417969, "logps/rejected": -338.1617736816406, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.040354907512664795, "rewards/margins": 12.345037460327148, "rewards/rejected": -12.385391235351562, "step": 2242 }, { "epoch": 0.77, "learning_rate": 1.4153693069373329e-06, "logits/chosen": -0.4649687707424164, "logits/rejected": -0.4366561472415924, "logps/chosen": -208.81031799316406, "logps/rejected": -296.1197814941406, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.6295596361160278, "rewards/margins": 13.591198921203613, "rewards/rejected": -14.220758438110352, "step": 2243 }, { "epoch": 0.77, "learning_rate": 1.4148664710401278e-06, "logits/chosen": -0.5189743638038635, "logits/rejected": -0.5030973553657532, "logps/chosen": -160.34298706054688, "logps/rejected": -274.83697509765625, "loss": 0.0265, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8396327495574951, "rewards/margins": 11.364668846130371, "rewards/rejected": -13.204300880432129, "step": 2244 }, { "epoch": 0.77, "learning_rate": 1.414363508406941e-06, "logits/chosen": -0.5136992931365967, "logits/rejected": -0.5015036463737488, "logps/chosen": -198.75975036621094, "logps/rejected": -372.01953125, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 0.14145636558532715, "rewards/margins": 17.78081512451172, "rewards/rejected": -17.639358520507812, "step": 2245 }, { "epoch": 0.77, "learning_rate": 1.4138604191914208e-06, "logits/chosen": -0.4373435080051422, "logits/rejected": -0.4191981852054596, "logps/chosen": -194.05331420898438, "logps/rejected": -334.90478515625, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9392805099487305, "rewards/margins": 14.54472541809082, "rewards/rejected": -15.484004974365234, "step": 2246 }, { "epoch": 0.77, "learning_rate": 1.4133572035472538e-06, "logits/chosen": -0.5557065010070801, "logits/rejected": -0.5477868914604187, "logps/chosen": -265.63848876953125, "logps/rejected": -413.42657470703125, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.2909770011901855, "rewards/margins": 15.24288272857666, "rewards/rejected": -16.533859252929688, "step": 2247 }, { "epoch": 0.77, "learning_rate": 1.4128538616281659e-06, "logits/chosen": -0.4814463257789612, "logits/rejected": -0.4355066418647766, "logps/chosen": -190.36495971679688, "logps/rejected": -311.96051025390625, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -1.767092227935791, "rewards/margins": 14.208405494689941, "rewards/rejected": -15.975499153137207, "step": 2248 }, { "epoch": 0.77, "learning_rate": 1.4123503935879202e-06, "logits/chosen": -0.3849847614765167, "logits/rejected": -0.3859401047229767, "logps/chosen": -145.58360290527344, "logps/rejected": -243.90599060058594, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -1.013521432876587, "rewards/margins": 9.731744766235352, "rewards/rejected": -10.745266914367676, "step": 2249 }, { "epoch": 0.77, "learning_rate": 1.4118467995803203e-06, "logits/chosen": -0.5276597142219543, "logits/rejected": -0.49718570709228516, "logps/chosen": -259.1808166503906, "logps/rejected": -378.9829406738281, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.1215583086013794, "rewards/margins": 16.17641258239746, "rewards/rejected": -17.2979736328125, "step": 2250 }, { "epoch": 0.77, "learning_rate": 1.4113430797592067e-06, "logits/chosen": -0.5688252449035645, "logits/rejected": -0.556867241859436, "logps/chosen": -186.940185546875, "logps/rejected": -330.4301452636719, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -1.152793049812317, "rewards/margins": 14.63022518157959, "rewards/rejected": -15.783018112182617, "step": 2251 }, { "epoch": 0.77, "learning_rate": 1.4108392342784586e-06, "logits/chosen": -0.4786941409111023, "logits/rejected": -0.45354264974594116, "logps/chosen": -232.880615234375, "logps/rejected": -364.7914123535156, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.7607146501541138, "rewards/margins": 13.866096496582031, "rewards/rejected": -15.626810073852539, "step": 2252 }, { "epoch": 0.77, "learning_rate": 1.4103352632919943e-06, "logits/chosen": -0.4879797697067261, "logits/rejected": -0.4489801824092865, "logps/chosen": -235.57785034179688, "logps/rejected": -333.19970703125, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.3733140230178833, "rewards/margins": 16.05323600769043, "rewards/rejected": -16.426549911499023, "step": 2253 }, { "epoch": 0.77, "learning_rate": 1.40983116695377e-06, "logits/chosen": -0.3460284471511841, "logits/rejected": -0.3230748772621155, "logps/chosen": -203.6800994873047, "logps/rejected": -314.8506164550781, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -0.647675096988678, "rewards/margins": 12.188043594360352, "rewards/rejected": -12.835719108581543, "step": 2254 }, { "epoch": 0.77, "learning_rate": 1.4093269454177793e-06, "logits/chosen": -0.438433438539505, "logits/rejected": -0.4116132855415344, "logps/chosen": -154.15989685058594, "logps/rejected": -293.6675109863281, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.9558237791061401, "rewards/margins": 14.515266418457031, "rewards/rejected": -16.47109031677246, "step": 2255 }, { "epoch": 0.77, "learning_rate": 1.4088225988380565e-06, "logits/chosen": -0.45888233184814453, "logits/rejected": -0.4646313488483429, "logps/chosen": -165.81314086914062, "logps/rejected": -315.14764404296875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.4275436401367188, "rewards/margins": 12.290839195251465, "rewards/rejected": -14.7183837890625, "step": 2256 }, { "epoch": 0.77, "learning_rate": 1.4083181273686712e-06, "logits/chosen": -0.524199903011322, "logits/rejected": -0.5077854990959167, "logps/chosen": -125.71047973632812, "logps/rejected": -248.756591796875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.3482423722743988, "rewards/margins": 12.676046371459961, "rewards/rejected": -13.024287223815918, "step": 2257 }, { "epoch": 0.77, "learning_rate": 1.407813531163733e-06, "logits/chosen": -0.5053268074989319, "logits/rejected": -0.4640207886695862, "logps/chosen": -204.58599853515625, "logps/rejected": -262.5658874511719, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 0.1431625783443451, "rewards/margins": 12.181520462036133, "rewards/rejected": -12.038357734680176, "step": 2258 }, { "epoch": 0.77, "learning_rate": 1.4073088103773886e-06, "logits/chosen": -0.45840489864349365, "logits/rejected": -0.42070016264915466, "logps/chosen": -220.12559509277344, "logps/rejected": -297.6370849609375, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": -1.7109543085098267, "rewards/margins": 11.591803550720215, "rewards/rejected": -13.302757263183594, "step": 2259 }, { "epoch": 0.77, "learning_rate": 1.4068039651638243e-06, "logits/chosen": -0.45731863379478455, "logits/rejected": -0.4285762310028076, "logps/chosen": -182.27960205078125, "logps/rejected": -320.8672180175781, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.538311243057251, "rewards/margins": 14.3294095993042, "rewards/rejected": -15.867719650268555, "step": 2260 }, { "epoch": 0.77, "learning_rate": 1.4062989956772622e-06, "logits/chosen": -0.4868321716785431, "logits/rejected": -0.4874725937843323, "logps/chosen": -185.27911376953125, "logps/rejected": -390.3674621582031, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.506685733795166, "rewards/margins": 17.082855224609375, "rewards/rejected": -18.589542388916016, "step": 2261 }, { "epoch": 0.77, "learning_rate": 1.405793902071964e-06, "logits/chosen": -0.5489192008972168, "logits/rejected": -0.5309675335884094, "logps/chosen": -212.03073120117188, "logps/rejected": -332.3296203613281, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.4139059782028198, "rewards/margins": 14.089117050170898, "rewards/rejected": -15.503024101257324, "step": 2262 }, { "epoch": 0.77, "learning_rate": 1.4052886845022289e-06, "logits/chosen": -0.5831312537193298, "logits/rejected": -0.5608781576156616, "logps/chosen": -237.532958984375, "logps/rejected": -346.4190368652344, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8384857177734375, "rewards/margins": 10.686336517333984, "rewards/rejected": -12.524823188781738, "step": 2263 }, { "epoch": 0.77, "learning_rate": 1.4047833431223936e-06, "logits/chosen": -0.36465340852737427, "logits/rejected": -0.3559916615486145, "logps/chosen": -132.75001525878906, "logps/rejected": -239.68756103515625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.7936224937438965, "rewards/margins": 10.59595012664795, "rewards/rejected": -11.389572143554688, "step": 2264 }, { "epoch": 0.77, "learning_rate": 1.4042778780868334e-06, "logits/chosen": -0.39415374398231506, "logits/rejected": -0.3605796694755554, "logps/chosen": -130.62474060058594, "logps/rejected": -271.78863525390625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.7423655986785889, "rewards/margins": 13.201995849609375, "rewards/rejected": -13.94436264038086, "step": 2265 }, { "epoch": 0.77, "learning_rate": 1.4037722895499607e-06, "logits/chosen": -0.5433782339096069, "logits/rejected": -0.5161696076393127, "logps/chosen": -202.72877502441406, "logps/rejected": -415.2694091796875, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -1.7594009637832642, "rewards/margins": 18.794200897216797, "rewards/rejected": -20.553604125976562, "step": 2266 }, { "epoch": 0.77, "learning_rate": 1.4032665776662253e-06, "logits/chosen": -0.4754458963871002, "logits/rejected": -0.46985581517219543, "logps/chosen": -139.72592163085938, "logps/rejected": -290.2951965332031, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.9465586543083191, "rewards/margins": 13.621099472045898, "rewards/rejected": -14.567659378051758, "step": 2267 }, { "epoch": 0.77, "learning_rate": 1.4027607425901158e-06, "logits/chosen": -0.4535812735557556, "logits/rejected": -0.41718170046806335, "logps/chosen": -197.709228515625, "logps/rejected": -325.3611145019531, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.43734654784202576, "rewards/margins": 15.0704984664917, "rewards/rejected": -15.50784683227539, "step": 2268 }, { "epoch": 0.77, "learning_rate": 1.4022547844761577e-06, "logits/chosen": -0.6031200885772705, "logits/rejected": -0.5731643438339233, "logps/chosen": -181.02418518066406, "logps/rejected": -325.2194519042969, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8758053779602051, "rewards/margins": 13.932353973388672, "rewards/rejected": -14.808158874511719, "step": 2269 }, { "epoch": 0.77, "learning_rate": 1.4017487034789141e-06, "logits/chosen": -0.5513948202133179, "logits/rejected": -0.5641559362411499, "logps/chosen": -208.34950256347656, "logps/rejected": -400.88348388671875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.736910343170166, "rewards/margins": 16.866039276123047, "rewards/rejected": -18.602949142456055, "step": 2270 }, { "epoch": 0.78, "learning_rate": 1.401242499752986e-06, "logits/chosen": -0.3800715208053589, "logits/rejected": -0.3543553054332733, "logps/chosen": -201.28053283691406, "logps/rejected": -266.3292541503906, "loss": 0.0427, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14429236948490143, "rewards/margins": 12.032363891601562, "rewards/rejected": -12.176656723022461, "step": 2271 }, { "epoch": 0.78, "learning_rate": 1.4007361734530115e-06, "logits/chosen": -0.4891655445098877, "logits/rejected": -0.4407694935798645, "logps/chosen": -233.19570922851562, "logps/rejected": -348.976318359375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.831895112991333, "rewards/margins": 14.000389099121094, "rewards/rejected": -14.832284927368164, "step": 2272 }, { "epoch": 0.78, "learning_rate": 1.4002297247336658e-06, "logits/chosen": -0.4973326623439789, "logits/rejected": -0.4451309144496918, "logps/chosen": -212.15084838867188, "logps/rejected": -291.1258544921875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.23231926560401917, "rewards/margins": 13.0474853515625, "rewards/rejected": -13.279804229736328, "step": 2273 }, { "epoch": 0.78, "learning_rate": 1.399723153749663e-06, "logits/chosen": -0.45400047302246094, "logits/rejected": -0.44239088892936707, "logps/chosen": -172.48501586914062, "logps/rejected": -355.61663818359375, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -0.8512989282608032, "rewards/margins": 16.793601989746094, "rewards/rejected": -17.644901275634766, "step": 2274 }, { "epoch": 0.78, "learning_rate": 1.399216460655753e-06, "logits/chosen": -0.5074428915977478, "logits/rejected": -0.4830722510814667, "logps/chosen": -161.09490966796875, "logps/rejected": -268.43798828125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.48686355352401733, "rewards/margins": 13.39322280883789, "rewards/rejected": -13.880085945129395, "step": 2275 }, { "epoch": 0.78, "learning_rate": 1.3987096456067233e-06, "logits/chosen": -0.5886297225952148, "logits/rejected": -0.551768958568573, "logps/chosen": -209.0956268310547, "logps/rejected": -353.34967041015625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.6759635210037231, "rewards/margins": 16.514371871948242, "rewards/rejected": -17.190336227416992, "step": 2276 }, { "epoch": 0.78, "learning_rate": 1.3982027087573994e-06, "logits/chosen": -0.45441052317619324, "logits/rejected": -0.4296638071537018, "logps/chosen": -182.20913696289062, "logps/rejected": -316.765625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010551214218139648, "rewards/margins": 14.97632884979248, "rewards/rejected": -14.977383613586426, "step": 2277 }, { "epoch": 0.78, "learning_rate": 1.3976956502626434e-06, "logits/chosen": -0.46742498874664307, "logits/rejected": -0.40519219636917114, "logps/chosen": -184.14927673339844, "logps/rejected": -251.78712463378906, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.48301729559898376, "rewards/margins": 11.970735549926758, "rewards/rejected": -12.453753471374512, "step": 2278 }, { "epoch": 0.78, "learning_rate": 1.3971884702773544e-06, "logits/chosen": -0.5367267727851868, "logits/rejected": -0.5057008862495422, "logps/chosen": -274.8335876464844, "logps/rejected": -371.87811279296875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3428361117839813, "rewards/margins": 13.070234298706055, "rewards/rejected": -13.41307258605957, "step": 2279 }, { "epoch": 0.78, "learning_rate": 1.396681168956469e-06, "logits/chosen": -0.3947503864765167, "logits/rejected": -0.37831297516822815, "logps/chosen": -256.16143798828125, "logps/rejected": -391.0275573730469, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.67112135887146, "rewards/margins": 14.899049758911133, "rewards/rejected": -16.570171356201172, "step": 2280 }, { "epoch": 0.78, "learning_rate": 1.396173746454961e-06, "logits/chosen": -0.5430747866630554, "logits/rejected": -0.5189774036407471, "logps/chosen": -219.62637329101562, "logps/rejected": -325.3848571777344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.98903489112854, "rewards/margins": 13.928003311157227, "rewards/rejected": -14.917037963867188, "step": 2281 }, { "epoch": 0.78, "learning_rate": 1.3956662029278406e-06, "logits/chosen": -0.426555871963501, "logits/rejected": -0.4011137783527374, "logps/chosen": -253.48683166503906, "logps/rejected": -388.8984069824219, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.8713123798370361, "rewards/margins": 14.755306243896484, "rewards/rejected": -16.626619338989258, "step": 2282 }, { "epoch": 0.78, "learning_rate": 1.3951585385301553e-06, "logits/chosen": -0.5708690881729126, "logits/rejected": -0.5156324505805969, "logps/chosen": -279.9476623535156, "logps/rejected": -375.9476623535156, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841990053653717, "rewards/margins": 14.774444580078125, "rewards/rejected": -14.958643913269043, "step": 2283 }, { "epoch": 0.78, "learning_rate": 1.3946507534169904e-06, "logits/chosen": -0.47265416383743286, "logits/rejected": -0.4412694275379181, "logps/chosen": -172.21006774902344, "logps/rejected": -303.160400390625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9239422082901001, "rewards/margins": 13.509796142578125, "rewards/rejected": -14.433738708496094, "step": 2284 }, { "epoch": 0.78, "learning_rate": 1.394142847743466e-06, "logits/chosen": -0.32528048753738403, "logits/rejected": -0.31960129737854004, "logps/chosen": -142.2592315673828, "logps/rejected": -284.150146484375, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -2.014468193054199, "rewards/margins": 11.047598838806152, "rewards/rejected": -13.062066078186035, "step": 2285 }, { "epoch": 0.78, "learning_rate": 1.3936348216647412e-06, "logits/chosen": -0.4839843213558197, "logits/rejected": -0.42481762170791626, "logps/chosen": -242.91529846191406, "logps/rejected": -394.0207214355469, "loss": 0.0278, "rewards/accuracies": 0.9375, "rewards/chosen": -1.637488603591919, "rewards/margins": 16.461027145385742, "rewards/rejected": -18.0985164642334, "step": 2286 }, { "epoch": 0.78, "learning_rate": 1.3931266753360107e-06, "logits/chosen": -0.3964136242866516, "logits/rejected": -0.36520063877105713, "logps/chosen": -196.11566162109375, "logps/rejected": -275.71343994140625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.7557661533355713, "rewards/margins": 11.25980281829834, "rewards/rejected": -12.015569686889648, "step": 2287 }, { "epoch": 0.78, "learning_rate": 1.392618408912506e-06, "logits/chosen": -0.4896022081375122, "logits/rejected": -0.4720149636268616, "logps/chosen": -151.96217346191406, "logps/rejected": -307.9397277832031, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.1921635866165161, "rewards/margins": 14.378744125366211, "rewards/rejected": -15.570907592773438, "step": 2288 }, { "epoch": 0.78, "learning_rate": 1.3921100225494958e-06, "logits/chosen": -0.49564242362976074, "logits/rejected": -0.4581349492073059, "logps/chosen": -193.14004516601562, "logps/rejected": -319.709228515625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.46459245681762695, "rewards/margins": 15.4635591506958, "rewards/rejected": -14.998966217041016, "step": 2289 }, { "epoch": 0.78, "learning_rate": 1.3916015164022849e-06, "logits/chosen": -0.5045643448829651, "logits/rejected": -0.5071654915809631, "logps/chosen": -160.75021362304688, "logps/rejected": -321.0534362792969, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.8871136903762817, "rewards/margins": 11.707805633544922, "rewards/rejected": -13.59492015838623, "step": 2290 }, { "epoch": 0.78, "learning_rate": 1.391092890626215e-06, "logits/chosen": -0.42162132263183594, "logits/rejected": -0.39974939823150635, "logps/chosen": -222.16650390625, "logps/rejected": -382.5563049316406, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.2598075866699219, "rewards/margins": 14.64686393737793, "rewards/rejected": -14.906673431396484, "step": 2291 }, { "epoch": 0.78, "learning_rate": 1.3905841453766637e-06, "logits/chosen": -0.5283172130584717, "logits/rejected": -0.5206425786018372, "logps/chosen": -244.09841918945312, "logps/rejected": -333.96856689453125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.381083369255066, "rewards/margins": 12.910852432250977, "rewards/rejected": -14.291934967041016, "step": 2292 }, { "epoch": 0.78, "learning_rate": 1.3900752808090468e-06, "logits/chosen": -0.5199026465415955, "logits/rejected": -0.4787660837173462, "logps/chosen": -239.8296356201172, "logps/rejected": -363.0701904296875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9822815656661987, "rewards/margins": 16.9199275970459, "rewards/rejected": -17.902210235595703, "step": 2293 }, { "epoch": 0.78, "learning_rate": 1.3895662970788148e-06, "logits/chosen": -0.5630369186401367, "logits/rejected": -0.48817363381385803, "logps/chosen": -258.2994079589844, "logps/rejected": -371.926025390625, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -0.8562044501304626, "rewards/margins": 16.043258666992188, "rewards/rejected": -16.899463653564453, "step": 2294 }, { "epoch": 0.78, "learning_rate": 1.3890571943414546e-06, "logits/chosen": -0.4653392434120178, "logits/rejected": -0.44008222222328186, "logps/chosen": -245.38418579101562, "logps/rejected": -395.43792724609375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.148712396621704, "rewards/margins": 15.223220825195312, "rewards/rejected": -17.37193489074707, "step": 2295 }, { "epoch": 0.78, "learning_rate": 1.3885479727524914e-06, "logits/chosen": -0.44480153918266296, "logits/rejected": -0.43484005331993103, "logps/chosen": -165.94808959960938, "logps/rejected": -312.04376220703125, "loss": 0.0344, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1100683212280273, "rewards/margins": 11.938616752624512, "rewards/rejected": -14.048685073852539, "step": 2296 }, { "epoch": 0.78, "learning_rate": 1.3880386324674839e-06, "logits/chosen": -0.36652666330337524, "logits/rejected": -0.3208070993423462, "logps/chosen": -176.330810546875, "logps/rejected": -270.9109191894531, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.6840114593505859, "rewards/margins": 13.499828338623047, "rewards/rejected": -14.183841705322266, "step": 2297 }, { "epoch": 0.78, "learning_rate": 1.3875291736420297e-06, "logits/chosen": -0.48272621631622314, "logits/rejected": -0.4531199634075165, "logps/chosen": -175.99855041503906, "logps/rejected": -309.03668212890625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.1698756217956543, "rewards/margins": 14.152295112609863, "rewards/rejected": -15.32217025756836, "step": 2298 }, { "epoch": 0.78, "learning_rate": 1.3870195964317608e-06, "logits/chosen": -0.49521374702453613, "logits/rejected": -0.4453235864639282, "logps/chosen": -213.51332092285156, "logps/rejected": -315.0392150878906, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.6080929040908813, "rewards/margins": 14.027976989746094, "rewards/rejected": -14.636068344116211, "step": 2299 }, { "epoch": 0.78, "learning_rate": 1.386509900992346e-06, "logits/chosen": -0.4820748567581177, "logits/rejected": -0.4711836278438568, "logps/chosen": -194.6881103515625, "logps/rejected": -364.0968322753906, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.087127685546875, "rewards/margins": 14.030389785766602, "rewards/rejected": -17.117517471313477, "step": 2300 }, { "epoch": 0.79, "learning_rate": 1.3860000874794907e-06, "logits/chosen": -0.4109657406806946, "logits/rejected": -0.3653627336025238, "logps/chosen": -196.60589599609375, "logps/rejected": -358.3996276855469, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0932544469833374, "rewards/margins": 15.48130989074707, "rewards/rejected": -16.57456398010254, "step": 2301 }, { "epoch": 0.79, "learning_rate": 1.3854901560489354e-06, "logits/chosen": -0.3586757481098175, "logits/rejected": -0.34172093868255615, "logps/chosen": -208.52883911132812, "logps/rejected": -369.1207275390625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7953351140022278, "rewards/margins": 16.57461929321289, "rewards/rejected": -17.369953155517578, "step": 2302 }, { "epoch": 0.79, "learning_rate": 1.3849801068564576e-06, "logits/chosen": -0.4294581413269043, "logits/rejected": -0.4148881733417511, "logps/chosen": -200.2132568359375, "logps/rejected": -338.36419677734375, "loss": 0.0966, "rewards/accuracies": 0.9375, "rewards/chosen": -0.44169363379478455, "rewards/margins": 14.257646560668945, "rewards/rejected": -14.6993408203125, "step": 2303 }, { "epoch": 0.79, "learning_rate": 1.3844699400578693e-06, "logits/chosen": -0.44957345724105835, "logits/rejected": -0.4145696461200714, "logps/chosen": -206.5742645263672, "logps/rejected": -285.6876525878906, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.1144074946641922, "rewards/margins": 12.58120346069336, "rewards/rejected": -12.466794967651367, "step": 2304 }, { "epoch": 0.79, "learning_rate": 1.3839596558090207e-06, "logits/chosen": -0.48760664463043213, "logits/rejected": -0.4642045199871063, "logps/chosen": -165.2512664794922, "logps/rejected": -293.7466735839844, "loss": 0.0278, "rewards/accuracies": 0.9375, "rewards/chosen": -2.678378105163574, "rewards/margins": 13.128576278686523, "rewards/rejected": -15.806954383850098, "step": 2305 }, { "epoch": 0.79, "learning_rate": 1.383449254265796e-06, "logits/chosen": -0.3995923399925232, "logits/rejected": -0.36864960193634033, "logps/chosen": -158.11122131347656, "logps/rejected": -261.51904296875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.375652551651001, "rewards/margins": 11.424436569213867, "rewards/rejected": -12.800090789794922, "step": 2306 }, { "epoch": 0.79, "learning_rate": 1.3829387355841155e-06, "logits/chosen": -0.4302188456058502, "logits/rejected": -0.3980359435081482, "logps/chosen": -269.4266662597656, "logps/rejected": -373.17901611328125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.2758212089538574, "rewards/margins": 14.098209381103516, "rewards/rejected": -15.374032974243164, "step": 2307 }, { "epoch": 0.79, "learning_rate": 1.3824280999199365e-06, "logits/chosen": -0.4501820206642151, "logits/rejected": -0.39604708552360535, "logps/chosen": -228.19610595703125, "logps/rejected": -329.57940673828125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.02146732807159424, "rewards/margins": 14.82969856262207, "rewards/rejected": -14.808231353759766, "step": 2308 }, { "epoch": 0.79, "learning_rate": 1.3819173474292502e-06, "logits/chosen": -0.5176168084144592, "logits/rejected": -0.5009274482727051, "logps/chosen": -187.74636840820312, "logps/rejected": -319.0906982421875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.08129069209098816, "rewards/margins": 13.974153518676758, "rewards/rejected": -14.055445671081543, "step": 2309 }, { "epoch": 0.79, "learning_rate": 1.3814064782680847e-06, "logits/chosen": -0.5092946887016296, "logits/rejected": -0.49033522605895996, "logps/chosen": -222.8961181640625, "logps/rejected": -362.77227783203125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2968767583370209, "rewards/margins": 15.45422077178955, "rewards/rejected": -15.15734577178955, "step": 2310 }, { "epoch": 0.79, "learning_rate": 1.3808954925925041e-06, "logits/chosen": -0.46248334646224976, "logits/rejected": -0.4452892243862152, "logps/chosen": -173.57969665527344, "logps/rejected": -338.6146545410156, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.7613208293914795, "rewards/margins": 13.90262222290039, "rewards/rejected": -16.663944244384766, "step": 2311 }, { "epoch": 0.79, "learning_rate": 1.3803843905586067e-06, "logits/chosen": -0.45080894231796265, "logits/rejected": -0.4347894489765167, "logps/chosen": -178.9904022216797, "logps/rejected": -325.42083740234375, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -1.0909353494644165, "rewards/margins": 13.680662155151367, "rewards/rejected": -14.771597862243652, "step": 2312 }, { "epoch": 0.79, "learning_rate": 1.3798731723225273e-06, "logits/chosen": -0.5102605223655701, "logits/rejected": -0.4649409055709839, "logps/chosen": -222.06573486328125, "logps/rejected": -425.784912109375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.095249891281128, "rewards/margins": 19.331043243408203, "rewards/rejected": -21.426292419433594, "step": 2313 }, { "epoch": 0.79, "learning_rate": 1.3793618380404364e-06, "logits/chosen": -0.465956449508667, "logits/rejected": -0.43627387285232544, "logps/chosen": -235.57144165039062, "logps/rejected": -324.3101806640625, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -0.9503159523010254, "rewards/margins": 11.907115936279297, "rewards/rejected": -12.857433319091797, "step": 2314 }, { "epoch": 0.79, "learning_rate": 1.3788503878685389e-06, "logits/chosen": -0.49893349409103394, "logits/rejected": -0.48906129598617554, "logps/chosen": -218.33953857421875, "logps/rejected": -351.83404541015625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1779818534851074, "rewards/margins": 12.610518455505371, "rewards/rejected": -13.788500785827637, "step": 2315 }, { "epoch": 0.79, "learning_rate": 1.3783388219630764e-06, "logits/chosen": -0.40081560611724854, "logits/rejected": -0.38984543085098267, "logps/chosen": -159.68096923828125, "logps/rejected": -292.073486328125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.7081186771392822, "rewards/margins": 11.75286865234375, "rewards/rejected": -12.460987091064453, "step": 2316 }, { "epoch": 0.79, "learning_rate": 1.377827140480325e-06, "logits/chosen": -0.537779688835144, "logits/rejected": -0.5012116432189941, "logps/chosen": -279.00140380859375, "logps/rejected": -409.3265380859375, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 0.35284024477005005, "rewards/margins": 15.974377632141113, "rewards/rejected": -15.621535301208496, "step": 2317 }, { "epoch": 0.79, "learning_rate": 1.3773153435765963e-06, "logits/chosen": -0.43265774846076965, "logits/rejected": -0.40584784746170044, "logps/chosen": -185.72927856445312, "logps/rejected": -312.39520263671875, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.8296332359313965, "rewards/margins": 13.125072479248047, "rewards/rejected": -14.954706192016602, "step": 2318 }, { "epoch": 0.79, "learning_rate": 1.376803431408237e-06, "logits/chosen": -0.5442944169044495, "logits/rejected": -0.47936511039733887, "logps/chosen": -223.06253051757812, "logps/rejected": -343.7677307128906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.40908536314964294, "rewards/margins": 14.113930702209473, "rewards/rejected": -14.523015975952148, "step": 2319 }, { "epoch": 0.79, "learning_rate": 1.3762914041316296e-06, "logits/chosen": -0.36728575825691223, "logits/rejected": -0.30163317918777466, "logps/chosen": -198.76815795898438, "logps/rejected": -307.15618896484375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6724792718887329, "rewards/margins": 14.804515838623047, "rewards/rejected": -15.476995468139648, "step": 2320 }, { "epoch": 0.79, "learning_rate": 1.375779261903191e-06, "logits/chosen": -0.42612791061401367, "logits/rejected": -0.38995614647865295, "logps/chosen": -202.36549377441406, "logps/rejected": -321.6221618652344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7849719524383545, "rewards/margins": 14.679649353027344, "rewards/rejected": -16.464622497558594, "step": 2321 }, { "epoch": 0.79, "learning_rate": 1.3752670048793743e-06, "logits/chosen": -0.35775691270828247, "logits/rejected": -0.3003390431404114, "logps/chosen": -183.3656005859375, "logps/rejected": -259.10931396484375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.262769937515259, "rewards/margins": 12.023555755615234, "rewards/rejected": -14.286324501037598, "step": 2322 }, { "epoch": 0.79, "learning_rate": 1.374754633216666e-06, "logits/chosen": -0.4569890797138214, "logits/rejected": -0.44529709219932556, "logps/chosen": -199.0763702392578, "logps/rejected": -305.3923645019531, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -1.4747977256774902, "rewards/margins": 11.253756523132324, "rewards/rejected": -12.728554725646973, "step": 2323 }, { "epoch": 0.79, "learning_rate": 1.374242147071589e-06, "logits/chosen": -0.4356200098991394, "logits/rejected": -0.43543851375579834, "logps/chosen": -152.36349487304688, "logps/rejected": -245.33384704589844, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0181665420532227, "rewards/margins": 10.327664375305176, "rewards/rejected": -11.345831871032715, "step": 2324 }, { "epoch": 0.79, "learning_rate": 1.3737295466007004e-06, "logits/chosen": -0.3906887173652649, "logits/rejected": -0.36903175711631775, "logps/chosen": -171.74400329589844, "logps/rejected": -343.1314697265625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8552966713905334, "rewards/margins": 14.313515663146973, "rewards/rejected": -15.16881275177002, "step": 2325 }, { "epoch": 0.79, "learning_rate": 1.3732168319605934e-06, "logits/chosen": -0.41011783480644226, "logits/rejected": -0.3823552429676056, "logps/chosen": -221.61041259765625, "logps/rejected": -296.0270690917969, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -1.3696074485778809, "rewards/margins": 11.096101760864258, "rewards/rejected": -12.465709686279297, "step": 2326 }, { "epoch": 0.79, "learning_rate": 1.3727040033078946e-06, "logits/chosen": -0.4226773977279663, "logits/rejected": -0.38406187295913696, "logps/chosen": -204.56649780273438, "logps/rejected": -308.9544677734375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.3959293365478516, "rewards/margins": 12.051252365112305, "rewards/rejected": -13.44718074798584, "step": 2327 }, { "epoch": 0.79, "learning_rate": 1.372191060799266e-06, "logits/chosen": -0.4475088119506836, "logits/rejected": -0.4324062168598175, "logps/chosen": -254.056640625, "logps/rejected": -369.21209716796875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.6128398180007935, "rewards/margins": 11.727209091186523, "rewards/rejected": -12.340049743652344, "step": 2328 }, { "epoch": 0.79, "learning_rate": 1.371678004591405e-06, "logits/chosen": -0.39503252506256104, "logits/rejected": -0.369846373796463, "logps/chosen": -152.79222106933594, "logps/rejected": -308.14556884765625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.398364782333374, "rewards/margins": 14.19390869140625, "rewards/rejected": -14.59227466583252, "step": 2329 }, { "epoch": 0.8, "learning_rate": 1.3711648348410425e-06, "logits/chosen": -0.39937543869018555, "logits/rejected": -0.3774169683456421, "logps/chosen": -215.7429656982422, "logps/rejected": -323.9293212890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.11819998919963837, "rewards/margins": 13.83340072631836, "rewards/rejected": -13.715200424194336, "step": 2330 }, { "epoch": 0.8, "learning_rate": 1.3706515517049453e-06, "logits/chosen": -0.41810935735702515, "logits/rejected": -0.41073498129844666, "logps/chosen": -217.35110473632812, "logps/rejected": -407.8724365234375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.1802430152893066, "rewards/margins": 16.06153678894043, "rewards/rejected": -18.24178123474121, "step": 2331 }, { "epoch": 0.8, "learning_rate": 1.3701381553399146e-06, "logits/chosen": -0.47843897342681885, "logits/rejected": -0.4338051676750183, "logps/chosen": -228.14044189453125, "logps/rejected": -282.01885986328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.26281681656837463, "rewards/margins": 13.778456687927246, "rewards/rejected": -14.04127311706543, "step": 2332 }, { "epoch": 0.8, "learning_rate": 1.369624645902785e-06, "logits/chosen": -0.40295422077178955, "logits/rejected": -0.3765568435192108, "logps/chosen": -136.75283813476562, "logps/rejected": -202.39964294433594, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.9635424613952637, "rewards/margins": 8.711088180541992, "rewards/rejected": -10.674631118774414, "step": 2333 }, { "epoch": 0.8, "learning_rate": 1.3691110235504268e-06, "logits/chosen": -0.3901672065258026, "logits/rejected": -0.37885749340057373, "logps/chosen": -225.14120483398438, "logps/rejected": -369.68548583984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6100343465805054, "rewards/margins": 13.734516143798828, "rewards/rejected": -14.344551086425781, "step": 2334 }, { "epoch": 0.8, "learning_rate": 1.3685972884397452e-06, "logits/chosen": -0.40892067551612854, "logits/rejected": -0.3823591470718384, "logps/chosen": -251.131591796875, "logps/rejected": -333.1059875488281, "loss": 0.0445, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8444626927375793, "rewards/margins": 12.356391906738281, "rewards/rejected": -13.200854301452637, "step": 2335 }, { "epoch": 0.8, "learning_rate": 1.3680834407276785e-06, "logits/chosen": -0.469631552696228, "logits/rejected": -0.45145469903945923, "logps/chosen": -194.25088500976562, "logps/rejected": -320.7614440917969, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.7868189811706543, "rewards/margins": 12.751184463500977, "rewards/rejected": -14.538002967834473, "step": 2336 }, { "epoch": 0.8, "learning_rate": 1.3675694805712003e-06, "logits/chosen": -0.45754292607307434, "logits/rejected": -0.40873950719833374, "logps/chosen": -228.68516540527344, "logps/rejected": -299.0184326171875, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 0.2039119452238083, "rewards/margins": 11.54401969909668, "rewards/rejected": -11.340106964111328, "step": 2337 }, { "epoch": 0.8, "learning_rate": 1.3670554081273186e-06, "logits/chosen": -0.5264040231704712, "logits/rejected": -0.5292217135429382, "logps/chosen": -203.09422302246094, "logps/rejected": -325.7848815917969, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.9874251484870911, "rewards/margins": 11.36914348602295, "rewards/rejected": -12.3565673828125, "step": 2338 }, { "epoch": 0.8, "learning_rate": 1.366541223553075e-06, "logits/chosen": -0.3925975263118744, "logits/rejected": -0.37552887201309204, "logps/chosen": -180.87445068359375, "logps/rejected": -286.5572814941406, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.533221960067749, "rewards/margins": 12.704668045043945, "rewards/rejected": -15.237890243530273, "step": 2339 }, { "epoch": 0.8, "learning_rate": 1.3660269270055458e-06, "logits/chosen": -0.5396683216094971, "logits/rejected": -0.5029234290122986, "logps/chosen": -209.41981506347656, "logps/rejected": -293.9040222167969, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.7035807371139526, "rewards/margins": 12.741119384765625, "rewards/rejected": -13.44469928741455, "step": 2340 }, { "epoch": 0.8, "learning_rate": 1.3655125186418422e-06, "logits/chosen": -0.34645015001296997, "logits/rejected": -0.3442443907260895, "logps/chosen": -128.4299774169922, "logps/rejected": -243.75747680664062, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2763158082962036, "rewards/margins": 9.717911720275879, "rewards/rejected": -10.994227409362793, "step": 2341 }, { "epoch": 0.8, "learning_rate": 1.364997998619108e-06, "logits/chosen": -0.40343183279037476, "logits/rejected": -0.35044828057289124, "logps/chosen": -263.60198974609375, "logps/rejected": -342.24530029296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.43802815675735474, "rewards/margins": 15.086642265319824, "rewards/rejected": -15.524669647216797, "step": 2342 }, { "epoch": 0.8, "learning_rate": 1.3644833670945222e-06, "logits/chosen": -0.42427879571914673, "logits/rejected": -0.42074403166770935, "logps/chosen": -156.49896240234375, "logps/rejected": -305.0325927734375, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.7807810306549072, "rewards/margins": 11.575072288513184, "rewards/rejected": -13.355853080749512, "step": 2343 }, { "epoch": 0.8, "learning_rate": 1.363968624225298e-06, "logits/chosen": -0.37088266015052795, "logits/rejected": -0.3403075337409973, "logps/chosen": -186.93804931640625, "logps/rejected": -379.3214111328125, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.3122630715370178, "rewards/margins": 17.502614974975586, "rewards/rejected": -17.814878463745117, "step": 2344 }, { "epoch": 0.8, "learning_rate": 1.3634537701686817e-06, "logits/chosen": -0.5145509839057922, "logits/rejected": -0.5072956085205078, "logps/chosen": -220.6946258544922, "logps/rejected": -323.7173156738281, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -2.2167537212371826, "rewards/margins": 10.792469024658203, "rewards/rejected": -13.009222030639648, "step": 2345 }, { "epoch": 0.8, "learning_rate": 1.3629388050819546e-06, "logits/chosen": -0.45150622725486755, "logits/rejected": -0.4191000461578369, "logps/chosen": -200.8042755126953, "logps/rejected": -315.29345703125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.798275887966156, "rewards/margins": 13.058956146240234, "rewards/rejected": -13.857234001159668, "step": 2346 }, { "epoch": 0.8, "learning_rate": 1.3624237291224313e-06, "logits/chosen": -0.3082824945449829, "logits/rejected": -0.28302523493766785, "logps/chosen": -157.0912322998047, "logps/rejected": -228.93838500976562, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.6606017351150513, "rewards/margins": 9.237223625183105, "rewards/rejected": -10.8978271484375, "step": 2347 }, { "epoch": 0.8, "learning_rate": 1.3619085424474599e-06, "logits/chosen": -0.4423970878124237, "logits/rejected": -0.4088040590286255, "logps/chosen": -247.92633056640625, "logps/rejected": -371.7664489746094, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4218769967556, "rewards/margins": 14.377786636352539, "rewards/rejected": -14.799663543701172, "step": 2348 }, { "epoch": 0.8, "learning_rate": 1.3613932452144234e-06, "logits/chosen": -0.3135484755039215, "logits/rejected": -0.29540178179740906, "logps/chosen": -130.68316650390625, "logps/rejected": -224.04185485839844, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -1.7120360136032104, "rewards/margins": 9.935018539428711, "rewards/rejected": -11.647053718566895, "step": 2349 }, { "epoch": 0.8, "learning_rate": 1.360877837580738e-06, "logits/chosen": -0.34628236293792725, "logits/rejected": -0.3217584490776062, "logps/chosen": -156.1222686767578, "logps/rejected": -320.3074951171875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -2.097440242767334, "rewards/margins": 14.709837913513184, "rewards/rejected": -16.80727767944336, "step": 2350 }, { "epoch": 0.8, "learning_rate": 1.3603623197038534e-06, "logits/chosen": -0.3332567512989044, "logits/rejected": -0.30495283007621765, "logps/chosen": -231.30503845214844, "logps/rejected": -391.3965759277344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.2628803253173828, "rewards/margins": 15.366785049438477, "rewards/rejected": -16.62966537475586, "step": 2351 }, { "epoch": 0.8, "learning_rate": 1.3598466917412533e-06, "logits/chosen": -0.3490583896636963, "logits/rejected": -0.31274545192718506, "logps/chosen": -154.7063751220703, "logps/rejected": -286.28717041015625, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -0.7135756015777588, "rewards/margins": 14.279827117919922, "rewards/rejected": -14.993401527404785, "step": 2352 }, { "epoch": 0.8, "learning_rate": 1.3593309538504547e-06, "logits/chosen": -0.3632020950317383, "logits/rejected": -0.31981131434440613, "logps/chosen": -231.26034545898438, "logps/rejected": -348.1495361328125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.7501382827758789, "rewards/margins": 15.352084159851074, "rewards/rejected": -16.102222442626953, "step": 2353 }, { "epoch": 0.8, "learning_rate": 1.3588151061890086e-06, "logits/chosen": -0.4132368862628937, "logits/rejected": -0.3802357017993927, "logps/chosen": -191.56130981445312, "logps/rejected": -342.7354431152344, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 0.04749360680580139, "rewards/margins": 15.458507537841797, "rewards/rejected": -15.41101360321045, "step": 2354 }, { "epoch": 0.8, "learning_rate": 1.358299148914499e-06, "logits/chosen": -0.579531192779541, "logits/rejected": -0.5595876574516296, "logps/chosen": -262.29345703125, "logps/rejected": -394.0904846191406, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9448232054710388, "rewards/margins": 14.007766723632812, "rewards/rejected": -14.952589988708496, "step": 2355 }, { "epoch": 0.8, "learning_rate": 1.3577830821845447e-06, "logits/chosen": -0.37188664078712463, "logits/rejected": -0.342765212059021, "logps/chosen": -220.53366088867188, "logps/rejected": -313.0572204589844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.5360581874847412, "rewards/margins": 13.39602279663086, "rewards/rejected": -14.932083129882812, "step": 2356 }, { "epoch": 0.8, "learning_rate": 1.3572669061567959e-06, "logits/chosen": -0.4219329357147217, "logits/rejected": -0.40924787521362305, "logps/chosen": -237.07212829589844, "logps/rejected": -398.5165100097656, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.24499569833278656, "rewards/margins": 16.534666061401367, "rewards/rejected": -16.7796630859375, "step": 2357 }, { "epoch": 0.8, "learning_rate": 1.3567506209889377e-06, "logits/chosen": -0.4685179889202118, "logits/rejected": -0.4570695459842682, "logps/chosen": -240.355224609375, "logps/rejected": -353.52532958984375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.05156949162483215, "rewards/margins": 12.129935264587402, "rewards/rejected": -12.078365325927734, "step": 2358 }, { "epoch": 0.81, "learning_rate": 1.356234226838688e-06, "logits/chosen": -0.4313822388648987, "logits/rejected": -0.4124622344970703, "logps/chosen": -198.0624237060547, "logps/rejected": -289.96337890625, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -1.9924931526184082, "rewards/margins": 10.54043197631836, "rewards/rejected": -12.53292465209961, "step": 2359 }, { "epoch": 0.81, "learning_rate": 1.3557177238637985e-06, "logits/chosen": -0.3279758393764496, "logits/rejected": -0.3154909908771515, "logps/chosen": -215.4788818359375, "logps/rejected": -353.28759765625, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -2.770005464553833, "rewards/margins": 13.083215713500977, "rewards/rejected": -15.853219985961914, "step": 2360 }, { "epoch": 0.81, "learning_rate": 1.355201112222053e-06, "logits/chosen": -0.3699704110622406, "logits/rejected": -0.3415287435054779, "logps/chosen": -151.63961791992188, "logps/rejected": -254.7771453857422, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.487928867340088, "rewards/margins": 10.65389347076416, "rewards/rejected": -12.141820907592773, "step": 2361 }, { "epoch": 0.81, "learning_rate": 1.3546843920712698e-06, "logits/chosen": -0.2792297303676605, "logits/rejected": -0.2641102969646454, "logps/chosen": -228.585205078125, "logps/rejected": -398.53436279296875, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.7226940393447876, "rewards/margins": 17.278209686279297, "rewards/rejected": -18.000904083251953, "step": 2362 }, { "epoch": 0.81, "learning_rate": 1.3541675635692996e-06, "logits/chosen": -0.3913232386112213, "logits/rejected": -0.3808310329914093, "logps/chosen": -183.4002685546875, "logps/rejected": -282.69818115234375, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.30641523003578186, "rewards/margins": 11.262031555175781, "rewards/rejected": -10.955615043640137, "step": 2363 }, { "epoch": 0.81, "learning_rate": 1.353650626874026e-06, "logits/chosen": -0.3855898082256317, "logits/rejected": -0.3655124604701996, "logps/chosen": -251.14862060546875, "logps/rejected": -425.7526550292969, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.6071387529373169, "rewards/margins": 17.951683044433594, "rewards/rejected": -18.558822631835938, "step": 2364 }, { "epoch": 0.81, "learning_rate": 1.3531335821433667e-06, "logits/chosen": -0.35077086091041565, "logits/rejected": -0.34436681866645813, "logps/chosen": -168.2758331298828, "logps/rejected": -295.0155334472656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.8066699504852295, "rewards/margins": 12.66925048828125, "rewards/rejected": -14.475919723510742, "step": 2365 }, { "epoch": 0.81, "learning_rate": 1.3526164295352716e-06, "logits/chosen": -0.3945907652378082, "logits/rejected": -0.36985403299331665, "logps/chosen": -243.7061004638672, "logps/rejected": -402.501220703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7146546840667725, "rewards/margins": 16.48290252685547, "rewards/rejected": -17.19755744934082, "step": 2366 }, { "epoch": 0.81, "learning_rate": 1.3520991692077228e-06, "logits/chosen": -0.3276823163032532, "logits/rejected": -0.31418007612228394, "logps/chosen": -178.54466247558594, "logps/rejected": -290.411376953125, "loss": 0.0751, "rewards/accuracies": 1.0, "rewards/chosen": -1.0810673236846924, "rewards/margins": 10.836478233337402, "rewards/rejected": -11.917546272277832, "step": 2367 }, { "epoch": 0.81, "learning_rate": 1.3515818013187375e-06, "logits/chosen": -0.41738927364349365, "logits/rejected": -0.38730522990226746, "logps/chosen": -225.90701293945312, "logps/rejected": -287.5889892578125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -2.367556571960449, "rewards/margins": 10.687993049621582, "rewards/rejected": -13.055549621582031, "step": 2368 }, { "epoch": 0.81, "learning_rate": 1.3510643260263635e-06, "logits/chosen": -0.44268572330474854, "logits/rejected": -0.42884665727615356, "logps/chosen": -218.9153594970703, "logps/rejected": -314.9765930175781, "loss": 0.0743, "rewards/accuracies": 1.0, "rewards/chosen": -1.4872581958770752, "rewards/margins": 11.776131629943848, "rewards/rejected": -13.263389587402344, "step": 2369 }, { "epoch": 0.81, "learning_rate": 1.350546743488683e-06, "logits/chosen": -0.33051761984825134, "logits/rejected": -0.3188607096672058, "logps/chosen": -146.61257934570312, "logps/rejected": -279.6812744140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2777318954467773, "rewards/margins": 11.773157119750977, "rewards/rejected": -13.050888061523438, "step": 2370 }, { "epoch": 0.81, "learning_rate": 1.35002905386381e-06, "logits/chosen": -0.45182710886001587, "logits/rejected": -0.43515998125076294, "logps/chosen": -181.18441772460938, "logps/rejected": -307.651123046875, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -1.5623383522033691, "rewards/margins": 13.563472747802734, "rewards/rejected": -15.125810623168945, "step": 2371 }, { "epoch": 0.81, "learning_rate": 1.3495112573098913e-06, "logits/chosen": -0.31997737288475037, "logits/rejected": -0.27268674969673157, "logps/chosen": -238.31134033203125, "logps/rejected": -304.0249938964844, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.4922003746032715, "rewards/margins": 11.900409698486328, "rewards/rejected": -14.392609596252441, "step": 2372 }, { "epoch": 0.81, "learning_rate": 1.3489933539851066e-06, "logits/chosen": -0.29521217942237854, "logits/rejected": -0.2779960036277771, "logps/chosen": -167.63217163085938, "logps/rejected": -343.52349853515625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.3172107934951782, "rewards/margins": 15.259571075439453, "rewards/rejected": -16.5767822265625, "step": 2373 }, { "epoch": 0.81, "learning_rate": 1.3484753440476691e-06, "logits/chosen": -0.43123501539230347, "logits/rejected": -0.4150255620479584, "logps/chosen": -225.9777374267578, "logps/rejected": -379.94891357421875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.853602170944214, "rewards/margins": 16.260189056396484, "rewards/rejected": -19.11379051208496, "step": 2374 }, { "epoch": 0.81, "learning_rate": 1.3479572276558227e-06, "logits/chosen": -0.33608290553092957, "logits/rejected": -0.31133732199668884, "logps/chosen": -223.76560974121094, "logps/rejected": -325.80511474609375, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.5681941509246826, "rewards/margins": 11.211406707763672, "rewards/rejected": -12.779600143432617, "step": 2375 }, { "epoch": 0.81, "learning_rate": 1.3474390049678453e-06, "logits/chosen": -0.43593770265579224, "logits/rejected": -0.39427223801612854, "logps/chosen": -170.1387939453125, "logps/rejected": -243.25392150878906, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -0.4353206753730774, "rewards/margins": 10.474760055541992, "rewards/rejected": -10.910080909729004, "step": 2376 }, { "epoch": 0.81, "learning_rate": 1.3469206761420466e-06, "logits/chosen": -0.2815972864627838, "logits/rejected": -0.22646081447601318, "logps/chosen": -189.1640625, "logps/rejected": -282.9952087402344, "loss": 0.0438, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7242860794067383, "rewards/margins": 11.541929244995117, "rewards/rejected": -14.266214370727539, "step": 2377 }, { "epoch": 0.81, "learning_rate": 1.3464022413367686e-06, "logits/chosen": -0.4593889117240906, "logits/rejected": -0.4432954490184784, "logps/chosen": -259.16436767578125, "logps/rejected": -360.05206298828125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.1773152351379395, "rewards/margins": 13.226845741271973, "rewards/rejected": -14.404160499572754, "step": 2378 }, { "epoch": 0.81, "learning_rate": 1.345883700710387e-06, "logits/chosen": -0.366500586271286, "logits/rejected": -0.34902966022491455, "logps/chosen": -193.8219757080078, "logps/rejected": -327.06396484375, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": -1.4758861064910889, "rewards/margins": 11.598779678344727, "rewards/rejected": -13.074665069580078, "step": 2379 }, { "epoch": 0.81, "learning_rate": 1.3453650544213076e-06, "logits/chosen": -0.38566482067108154, "logits/rejected": -0.3460785746574402, "logps/chosen": -254.22445678710938, "logps/rejected": -339.5133056640625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.8345070481300354, "rewards/margins": 12.517953872680664, "rewards/rejected": -13.352460861206055, "step": 2380 }, { "epoch": 0.81, "learning_rate": 1.3448463026279704e-06, "logits/chosen": -0.3353938162326813, "logits/rejected": -0.3368963897228241, "logps/chosen": -223.40476989746094, "logps/rejected": -430.0904846191406, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.608767569065094, "rewards/margins": 18.121896743774414, "rewards/rejected": -18.73066520690918, "step": 2381 }, { "epoch": 0.81, "learning_rate": 1.3443274454888468e-06, "logits/chosen": -0.40444421768188477, "logits/rejected": -0.3747287392616272, "logps/chosen": -184.87278747558594, "logps/rejected": -291.13409423828125, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.364397406578064, "rewards/margins": 12.633926391601562, "rewards/rejected": -13.998322486877441, "step": 2382 }, { "epoch": 0.81, "learning_rate": 1.3438084831624403e-06, "logits/chosen": -0.3162722587585449, "logits/rejected": -0.31635379791259766, "logps/chosen": -124.6333999633789, "logps/rejected": -284.75665283203125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.356767177581787, "rewards/margins": 13.623241424560547, "rewards/rejected": -14.980009078979492, "step": 2383 }, { "epoch": 0.81, "learning_rate": 1.3432894158072871e-06, "logits/chosen": -0.45230981707572937, "logits/rejected": -0.42644616961479187, "logps/chosen": -201.6259765625, "logps/rejected": -332.5673828125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9664890766143799, "rewards/margins": 12.472542762756348, "rewards/rejected": -14.439030647277832, "step": 2384 }, { "epoch": 0.81, "learning_rate": 1.3427702435819547e-06, "logits/chosen": -0.42440265417099, "logits/rejected": -0.4013245105743408, "logps/chosen": -181.97201538085938, "logps/rejected": -277.67816162109375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.389948844909668, "rewards/margins": 10.773568153381348, "rewards/rejected": -13.163516998291016, "step": 2385 }, { "epoch": 0.81, "learning_rate": 1.342250966645043e-06, "logits/chosen": -0.5486533045768738, "logits/rejected": -0.4902738630771637, "logps/chosen": -222.57958984375, "logps/rejected": -323.190185546875, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4813642501831055, "rewards/margins": 12.293517112731934, "rewards/rejected": -13.774882316589355, "step": 2386 }, { "epoch": 0.81, "learning_rate": 1.3417315851551844e-06, "logits/chosen": -0.3752608895301819, "logits/rejected": -0.3341912031173706, "logps/chosen": -163.55007934570312, "logps/rejected": -277.4678649902344, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.9995765686035156, "rewards/margins": 12.123038291931152, "rewards/rejected": -15.122615814208984, "step": 2387 }, { "epoch": 0.82, "learning_rate": 1.3412120992710422e-06, "logits/chosen": -0.35791510343551636, "logits/rejected": -0.35344019532203674, "logps/chosen": -174.41563415527344, "logps/rejected": -304.7261962890625, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -1.0284966230392456, "rewards/margins": 14.157451629638672, "rewards/rejected": -15.18594741821289, "step": 2388 }, { "epoch": 0.82, "learning_rate": 1.340692509151313e-06, "logits/chosen": -0.4245589077472687, "logits/rejected": -0.37906092405319214, "logps/chosen": -248.3015899658203, "logps/rejected": -324.51275634765625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.16117988526821136, "rewards/margins": 13.742132186889648, "rewards/rejected": -13.903311729431152, "step": 2389 }, { "epoch": 0.82, "learning_rate": 1.3401728149547238e-06, "logits/chosen": -0.34712186455726624, "logits/rejected": -0.34775879979133606, "logps/chosen": -195.55743408203125, "logps/rejected": -372.0837707519531, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.225841760635376, "rewards/margins": 16.303741455078125, "rewards/rejected": -17.52958106994629, "step": 2390 }, { "epoch": 0.82, "learning_rate": 1.339653016840034e-06, "logits/chosen": -0.36325615644454956, "logits/rejected": -0.3433009684085846, "logps/chosen": -226.022216796875, "logps/rejected": -319.12060546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7000871896743774, "rewards/margins": 13.163915634155273, "rewards/rejected": -13.864002227783203, "step": 2391 }, { "epoch": 0.82, "learning_rate": 1.339133114966035e-06, "logits/chosen": -0.39550909399986267, "logits/rejected": -0.36618804931640625, "logps/chosen": -212.080322265625, "logps/rejected": -322.40374755859375, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -2.0971875190734863, "rewards/margins": 13.781641960144043, "rewards/rejected": -15.878829956054688, "step": 2392 }, { "epoch": 0.82, "learning_rate": 1.338613109491549e-06, "logits/chosen": -0.49893918633461, "logits/rejected": -0.5088251829147339, "logps/chosen": -211.06544494628906, "logps/rejected": -371.86871337890625, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -1.7604854106903076, "rewards/margins": 14.517160415649414, "rewards/rejected": -16.277645111083984, "step": 2393 }, { "epoch": 0.82, "learning_rate": 1.3380930005754318e-06, "logits/chosen": -0.4578622579574585, "logits/rejected": -0.41685810685157776, "logps/chosen": -233.14927673339844, "logps/rejected": -320.7602844238281, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -0.9045584201812744, "rewards/margins": 11.89461898803711, "rewards/rejected": -12.799178123474121, "step": 2394 }, { "epoch": 0.82, "learning_rate": 1.3375727883765683e-06, "logits/chosen": -0.43847209215164185, "logits/rejected": -0.3690659999847412, "logps/chosen": -190.1537628173828, "logps/rejected": -215.73475646972656, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -1.4421539306640625, "rewards/margins": 10.114831924438477, "rewards/rejected": -11.556985855102539, "step": 2395 }, { "epoch": 0.82, "learning_rate": 1.3370524730538765e-06, "logits/chosen": -0.3818430006504059, "logits/rejected": -0.3483392298221588, "logps/chosen": -206.50970458984375, "logps/rejected": -348.8394470214844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3546061515808105, "rewards/margins": 13.730813026428223, "rewards/rejected": -15.085420608520508, "step": 2396 }, { "epoch": 0.82, "learning_rate": 1.3365320547663057e-06, "logits/chosen": -0.4571686387062073, "logits/rejected": -0.4339297413825989, "logps/chosen": -239.28805541992188, "logps/rejected": -342.2414855957031, "loss": 0.0802, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7563395500183105, "rewards/margins": 12.679817199707031, "rewards/rejected": -13.4361572265625, "step": 2397 }, { "epoch": 0.82, "learning_rate": 1.3360115336728364e-06, "logits/chosen": -0.38925132155418396, "logits/rejected": -0.3658756911754608, "logps/chosen": -146.0916748046875, "logps/rejected": -288.84234619140625, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -1.5130809545516968, "rewards/margins": 14.435181617736816, "rewards/rejected": -15.948262214660645, "step": 2398 }, { "epoch": 0.82, "learning_rate": 1.3354909099324807e-06, "logits/chosen": -0.45194947719573975, "logits/rejected": -0.4244098961353302, "logps/chosen": -285.6606750488281, "logps/rejected": -368.5374755859375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9922010898590088, "rewards/margins": 15.036555290222168, "rewards/rejected": -16.02875518798828, "step": 2399 }, { "epoch": 0.82, "learning_rate": 1.3349701837042817e-06, "logits/chosen": -0.40501585602760315, "logits/rejected": -0.3835321068763733, "logps/chosen": -147.52015686035156, "logps/rejected": -327.1551208496094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6203523874282837, "rewards/margins": 15.493547439575195, "rewards/rejected": -17.113901138305664, "step": 2400 }, { "epoch": 0.82, "learning_rate": 1.3344493551473146e-06, "logits/chosen": -0.4772643744945526, "logits/rejected": -0.46232107281684875, "logps/chosen": -189.63731384277344, "logps/rejected": -356.00555419921875, "loss": 0.0848, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3568893671035767, "rewards/margins": 13.636134147644043, "rewards/rejected": -14.993024826049805, "step": 2401 }, { "epoch": 0.82, "learning_rate": 1.3339284244206845e-06, "logits/chosen": -0.4072469472885132, "logits/rejected": -0.3825471103191376, "logps/chosen": -179.39895629882812, "logps/rejected": -296.5303955078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.712775707244873, "rewards/margins": 13.427817344665527, "rewards/rejected": -14.140593528747559, "step": 2402 }, { "epoch": 0.82, "learning_rate": 1.3334073916835296e-06, "logits/chosen": -0.4349953830242157, "logits/rejected": -0.3912985622882843, "logps/chosen": -196.9373321533203, "logps/rejected": -283.8154602050781, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5700443983078003, "rewards/margins": 14.195399284362793, "rewards/rejected": -14.765442848205566, "step": 2403 }, { "epoch": 0.82, "learning_rate": 1.3328862570950174e-06, "logits/chosen": -0.5231958031654358, "logits/rejected": -0.50523442029953, "logps/chosen": -221.77468872070312, "logps/rejected": -385.3866882324219, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 0.321264386177063, "rewards/margins": 15.338652610778809, "rewards/rejected": -15.017388343811035, "step": 2404 }, { "epoch": 0.82, "learning_rate": 1.3323650208143477e-06, "logits/chosen": -0.4821774363517761, "logits/rejected": -0.46115419268608093, "logps/chosen": -163.30880737304688, "logps/rejected": -276.13372802734375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.1216901540756226, "rewards/margins": 12.754547119140625, "rewards/rejected": -13.876236915588379, "step": 2405 }, { "epoch": 0.82, "learning_rate": 1.3318436830007507e-06, "logits/chosen": -0.3776719272136688, "logits/rejected": -0.3744809329509735, "logps/chosen": -192.68643188476562, "logps/rejected": -379.03076171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.894977867603302, "rewards/margins": 13.467034339904785, "rewards/rejected": -14.362011909484863, "step": 2406 }, { "epoch": 0.82, "learning_rate": 1.331322243813488e-06, "logits/chosen": -0.5706880688667297, "logits/rejected": -0.5387275815010071, "logps/chosen": -232.82476806640625, "logps/rejected": -334.96441650390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7437140941619873, "rewards/margins": 14.510988235473633, "rewards/rejected": -15.254701614379883, "step": 2407 }, { "epoch": 0.82, "learning_rate": 1.3308007034118528e-06, "logits/chosen": -0.47111576795578003, "logits/rejected": -0.4447893798351288, "logps/chosen": -231.8627166748047, "logps/rejected": -327.4032287597656, "loss": 0.0518, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9657089710235596, "rewards/margins": 13.20333480834961, "rewards/rejected": -14.169044494628906, "step": 2408 }, { "epoch": 0.82, "learning_rate": 1.3302790619551672e-06, "logits/chosen": -0.36770084500312805, "logits/rejected": -0.3668922483921051, "logps/chosen": -126.9510498046875, "logps/rejected": -249.5858154296875, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": -1.3759456872940063, "rewards/margins": 10.250381469726562, "rewards/rejected": -11.626327514648438, "step": 2409 }, { "epoch": 0.82, "learning_rate": 1.329757319602786e-06, "logits/chosen": -0.48733606934547424, "logits/rejected": -0.46173036098480225, "logps/chosen": -206.9461212158203, "logps/rejected": -356.197265625, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 0.3785576820373535, "rewards/margins": 16.479944229125977, "rewards/rejected": -16.10138511657715, "step": 2410 }, { "epoch": 0.82, "learning_rate": 1.3292354765140949e-06, "logits/chosen": -0.45095136761665344, "logits/rejected": -0.41347137093544006, "logps/chosen": -194.39418029785156, "logps/rejected": -301.8453674316406, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.6011412739753723, "rewards/margins": 15.456653594970703, "rewards/rejected": -14.855512619018555, "step": 2411 }, { "epoch": 0.82, "learning_rate": 1.328713532848509e-06, "logits/chosen": -0.5040669441223145, "logits/rejected": -0.5062122941017151, "logps/chosen": -163.3111114501953, "logps/rejected": -295.36187744140625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.8224285840988159, "rewards/margins": 10.937164306640625, "rewards/rejected": -11.759593963623047, "step": 2412 }, { "epoch": 0.82, "learning_rate": 1.3281914887654752e-06, "logits/chosen": -0.48394137620925903, "logits/rejected": -0.458687961101532, "logps/chosen": -220.94944763183594, "logps/rejected": -296.89837646484375, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.5306240320205688, "rewards/margins": 11.517834663391113, "rewards/rejected": -12.048458099365234, "step": 2413 }, { "epoch": 0.82, "learning_rate": 1.3276693444244708e-06, "logits/chosen": -0.4356420636177063, "logits/rejected": -0.4278414845466614, "logps/chosen": -238.43472290039062, "logps/rejected": -423.299560546875, "loss": 0.0615, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10029025375843048, "rewards/margins": 18.726051330566406, "rewards/rejected": -18.826343536376953, "step": 2414 }, { "epoch": 0.82, "learning_rate": 1.3271470999850036e-06, "logits/chosen": -0.5898360013961792, "logits/rejected": -0.5590648651123047, "logps/chosen": -209.49879455566406, "logps/rejected": -323.0888671875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.5359299182891846, "rewards/margins": 13.36346435546875, "rewards/rejected": -13.899394989013672, "step": 2415 }, { "epoch": 0.82, "learning_rate": 1.3266247556066122e-06, "logits/chosen": -0.5018759965896606, "logits/rejected": -0.4882533848285675, "logps/chosen": -150.63140869140625, "logps/rejected": -284.314453125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0335769653320312, "rewards/margins": 13.92322826385498, "rewards/rejected": -14.956804275512695, "step": 2416 }, { "epoch": 0.82, "learning_rate": 1.3261023114488653e-06, "logits/chosen": -0.5408195853233337, "logits/rejected": -0.5149949193000793, "logps/chosen": -171.529296875, "logps/rejected": -333.5534362792969, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -1.3748831748962402, "rewards/margins": 15.497669219970703, "rewards/rejected": -16.87255096435547, "step": 2417 }, { "epoch": 0.83, "learning_rate": 1.3255797676713628e-06, "logits/chosen": -0.5388728380203247, "logits/rejected": -0.5176592469215393, "logps/chosen": -169.56985473632812, "logps/rejected": -309.7115173339844, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.409810870885849, "rewards/margins": 13.737929344177246, "rewards/rejected": -14.14774227142334, "step": 2418 }, { "epoch": 0.83, "learning_rate": 1.3250571244337342e-06, "logits/chosen": -0.4719082713127136, "logits/rejected": -0.4538227319717407, "logps/chosen": -208.15667724609375, "logps/rejected": -327.4112243652344, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8721704483032227, "rewards/margins": 12.862793922424316, "rewards/rejected": -13.734965324401855, "step": 2419 }, { "epoch": 0.83, "learning_rate": 1.3245343818956398e-06, "logits/chosen": -0.4679131805896759, "logits/rejected": -0.42817413806915283, "logps/chosen": -173.91061401367188, "logps/rejected": -297.3661193847656, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.7493860721588135, "rewards/margins": 12.866936683654785, "rewards/rejected": -14.616321563720703, "step": 2420 }, { "epoch": 0.83, "learning_rate": 1.3240115402167704e-06, "logits/chosen": -0.4259050786495209, "logits/rejected": -0.40677186846733093, "logps/chosen": -193.43212890625, "logps/rejected": -314.6592712402344, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.17610257863998413, "rewards/margins": 13.861918449401855, "rewards/rejected": -13.685815811157227, "step": 2421 }, { "epoch": 0.83, "learning_rate": 1.3234885995568475e-06, "logits/chosen": -0.489130437374115, "logits/rejected": -0.45467060804367065, "logps/chosen": -234.24014282226562, "logps/rejected": -331.97607421875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.5934354066848755, "rewards/margins": 13.259509086608887, "rewards/rejected": -13.852943420410156, "step": 2422 }, { "epoch": 0.83, "learning_rate": 1.3229655600756213e-06, "logits/chosen": -0.49117231369018555, "logits/rejected": -0.47109636664390564, "logps/chosen": -190.41983032226562, "logps/rejected": -317.1773986816406, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -1.3437070846557617, "rewards/margins": 12.431496620178223, "rewards/rejected": -13.775203704833984, "step": 2423 }, { "epoch": 0.83, "learning_rate": 1.3224424219328735e-06, "logits/chosen": -0.6242944598197937, "logits/rejected": -0.5926893949508667, "logps/chosen": -221.69949340820312, "logps/rejected": -351.61492919921875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.33946293592453003, "rewards/margins": 13.457049369812012, "rewards/rejected": -13.796513557434082, "step": 2424 }, { "epoch": 0.83, "learning_rate": 1.3219191852884154e-06, "logits/chosen": -0.4895980656147003, "logits/rejected": -0.44879353046417236, "logps/chosen": -184.79727172851562, "logps/rejected": -267.1202087402344, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.923626184463501, "rewards/margins": 12.464630126953125, "rewards/rejected": -13.388257026672363, "step": 2425 }, { "epoch": 0.83, "learning_rate": 1.321395850302089e-06, "logits/chosen": -0.5508759021759033, "logits/rejected": -0.552527666091919, "logps/chosen": -183.6857452392578, "logps/rejected": -317.01556396484375, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.8678817749023438, "rewards/margins": 11.57948112487793, "rewards/rejected": -13.44736385345459, "step": 2426 }, { "epoch": 0.83, "learning_rate": 1.3208724171337657e-06, "logits/chosen": -0.4266485571861267, "logits/rejected": -0.40937429666519165, "logps/chosen": -203.56434631347656, "logps/rejected": -361.3781433105469, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -1.8923488855361938, "rewards/margins": 12.696166038513184, "rewards/rejected": -14.588516235351562, "step": 2427 }, { "epoch": 0.83, "learning_rate": 1.320348885943347e-06, "logits/chosen": -0.5091579556465149, "logits/rejected": -0.4954129159450531, "logps/chosen": -210.58175659179688, "logps/rejected": -317.0348815917969, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2949349582195282, "rewards/margins": 14.100332260131836, "rewards/rejected": -13.805397033691406, "step": 2428 }, { "epoch": 0.83, "learning_rate": 1.3198252568907646e-06, "logits/chosen": -0.5681054592132568, "logits/rejected": -0.5409520864486694, "logps/chosen": -198.3098907470703, "logps/rejected": -381.2568054199219, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.5366615653038025, "rewards/margins": 16.382564544677734, "rewards/rejected": -16.919225692749023, "step": 2429 }, { "epoch": 0.83, "learning_rate": 1.3193015301359798e-06, "logits/chosen": -0.4659678637981415, "logits/rejected": -0.42055025696754456, "logps/chosen": -330.57525634765625, "logps/rejected": -355.4762268066406, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.9827184677124023, "rewards/margins": 15.364465713500977, "rewards/rejected": -14.381746292114258, "step": 2430 }, { "epoch": 0.83, "learning_rate": 1.3187777058389843e-06, "logits/chosen": -0.5813422799110413, "logits/rejected": -0.5482349991798401, "logps/chosen": -201.64552307128906, "logps/rejected": -315.82025146484375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.20596572756767273, "rewards/margins": 14.73293685913086, "rewards/rejected": -14.938902854919434, "step": 2431 }, { "epoch": 0.83, "learning_rate": 1.3182537841597987e-06, "logits/chosen": -0.476496160030365, "logits/rejected": -0.4718920886516571, "logps/chosen": -138.6707305908203, "logps/rejected": -308.58380126953125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.41473233699798584, "rewards/margins": 13.144779205322266, "rewards/rejected": -13.5595121383667, "step": 2432 }, { "epoch": 0.83, "learning_rate": 1.317729765258474e-06, "logits/chosen": -0.6287312507629395, "logits/rejected": -0.616989254951477, "logps/chosen": -165.33883666992188, "logps/rejected": -248.36178588867188, "loss": 0.0536, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8092641234397888, "rewards/margins": 10.970770835876465, "rewards/rejected": -11.780036926269531, "step": 2433 }, { "epoch": 0.83, "learning_rate": 1.317205649295091e-06, "logits/chosen": -0.4896603226661682, "logits/rejected": -0.48945704102516174, "logps/chosen": -208.95755004882812, "logps/rejected": -382.21490478515625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.9182260036468506, "rewards/margins": 14.34245777130127, "rewards/rejected": -17.26068115234375, "step": 2434 }, { "epoch": 0.83, "learning_rate": 1.3166814364297593e-06, "logits/chosen": -0.485273152589798, "logits/rejected": -0.4687836170196533, "logps/chosen": -203.89723205566406, "logps/rejected": -329.3190612792969, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.5669391751289368, "rewards/margins": 14.715763092041016, "rewards/rejected": -15.28270149230957, "step": 2435 }, { "epoch": 0.83, "learning_rate": 1.3161571268226195e-06, "logits/chosen": -0.39298883080482483, "logits/rejected": -0.36601364612579346, "logps/chosen": -183.64593505859375, "logps/rejected": -284.73480224609375, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -0.8810511827468872, "rewards/margins": 11.61185073852539, "rewards/rejected": -12.492901802062988, "step": 2436 }, { "epoch": 0.83, "learning_rate": 1.3156327206338403e-06, "logits/chosen": -0.49413496255874634, "logits/rejected": -0.4546705484390259, "logps/chosen": -231.76580810546875, "logps/rejected": -314.51226806640625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.746727705001831, "rewards/margins": 11.690886497497559, "rewards/rejected": -12.437615394592285, "step": 2437 }, { "epoch": 0.83, "learning_rate": 1.3151082180236209e-06, "logits/chosen": -0.5871114134788513, "logits/rejected": -0.5791433453559875, "logps/chosen": -218.887939453125, "logps/rejected": -368.801025390625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.514801025390625, "rewards/margins": 13.021745681762695, "rewards/rejected": -14.53654670715332, "step": 2438 }, { "epoch": 0.83, "learning_rate": 1.314583619152189e-06, "logits/chosen": -0.4956645965576172, "logits/rejected": -0.4966708719730377, "logps/chosen": -219.13351440429688, "logps/rejected": -391.2906494140625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.007040753960609436, "rewards/margins": 16.64558219909668, "rewards/rejected": -16.63854217529297, "step": 2439 }, { "epoch": 0.83, "learning_rate": 1.314058924179803e-06, "logits/chosen": -0.43906545639038086, "logits/rejected": -0.40165600180625916, "logps/chosen": -154.5261688232422, "logps/rejected": -196.54244995117188, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.7675495743751526, "rewards/margins": 8.855204582214355, "rewards/rejected": -9.622754096984863, "step": 2440 }, { "epoch": 0.83, "learning_rate": 1.3135341332667502e-06, "logits/chosen": -0.5353299379348755, "logits/rejected": -0.4971405267715454, "logps/chosen": -243.7876434326172, "logps/rejected": -382.07916259765625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.1092190742492676, "rewards/margins": 15.274116516113281, "rewards/rejected": -17.38333511352539, "step": 2441 }, { "epoch": 0.83, "learning_rate": 1.3130092465733463e-06, "logits/chosen": -0.5801804065704346, "logits/rejected": -0.5634208917617798, "logps/chosen": -220.41250610351562, "logps/rejected": -340.1609191894531, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.951287567615509, "rewards/margins": 13.724043846130371, "rewards/rejected": -14.675333023071289, "step": 2442 }, { "epoch": 0.83, "learning_rate": 1.312484264259937e-06, "logits/chosen": -0.6402624249458313, "logits/rejected": -0.5920387506484985, "logps/chosen": -229.8040008544922, "logps/rejected": -371.1889953613281, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.007951721549034119, "rewards/margins": 17.642444610595703, "rewards/rejected": -17.6503963470459, "step": 2443 }, { "epoch": 0.83, "learning_rate": 1.3119591864868977e-06, "logits/chosen": -0.4490835964679718, "logits/rejected": -0.42355743050575256, "logps/chosen": -172.7770233154297, "logps/rejected": -326.9128112792969, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5239435434341431, "rewards/margins": 13.988626480102539, "rewards/rejected": -14.51257038116455, "step": 2444 }, { "epoch": 0.83, "learning_rate": 1.3114340134146318e-06, "logits/chosen": -0.4151935875415802, "logits/rejected": -0.4055844843387604, "logps/chosen": -153.83883666992188, "logps/rejected": -269.0667419433594, "loss": 0.0285, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4390424489974976, "rewards/margins": 10.017325401306152, "rewards/rejected": -11.456368446350098, "step": 2445 }, { "epoch": 0.83, "learning_rate": 1.3109087452035728e-06, "logits/chosen": -0.4313795268535614, "logits/rejected": -0.3982321619987488, "logps/chosen": -209.96730041503906, "logps/rejected": -331.1034851074219, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.4546476602554321, "rewards/margins": 13.097753524780273, "rewards/rejected": -14.55240249633789, "step": 2446 }, { "epoch": 0.84, "learning_rate": 1.3103833820141828e-06, "logits/chosen": -0.49406692385673523, "logits/rejected": -0.47210270166397095, "logps/chosen": -219.35801696777344, "logps/rejected": -366.8535461425781, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.5860402584075928, "rewards/margins": 16.204509735107422, "rewards/rejected": -17.790552139282227, "step": 2447 }, { "epoch": 0.84, "learning_rate": 1.3098579240069526e-06, "logits/chosen": -0.40965238213539124, "logits/rejected": -0.36040017008781433, "logps/chosen": -184.63922119140625, "logps/rejected": -263.7457275390625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.45233988761901855, "rewards/margins": 12.831340789794922, "rewards/rejected": -13.283679962158203, "step": 2448 }, { "epoch": 0.84, "learning_rate": 1.3093323713424032e-06, "logits/chosen": -0.6006994843482971, "logits/rejected": -0.5929951071739197, "logps/chosen": -221.90293884277344, "logps/rejected": -311.0968017578125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.1791465282440186, "rewards/margins": 10.058876991271973, "rewards/rejected": -11.23802375793457, "step": 2449 }, { "epoch": 0.84, "learning_rate": 1.3088067241810833e-06, "logits/chosen": -0.4392337203025818, "logits/rejected": -0.4174039363861084, "logps/chosen": -226.79539489746094, "logps/rejected": -385.6905212402344, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -1.3487873077392578, "rewards/margins": 16.61358642578125, "rewards/rejected": -17.96237564086914, "step": 2450 }, { "epoch": 0.84, "learning_rate": 1.3082809826835705e-06, "logits/chosen": -0.41355594992637634, "logits/rejected": -0.32973790168762207, "logps/chosen": -239.89122009277344, "logps/rejected": -323.3902587890625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.6214191913604736, "rewards/margins": 14.045975685119629, "rewards/rejected": -14.667396545410156, "step": 2451 }, { "epoch": 0.84, "learning_rate": 1.307755147010472e-06, "logits/chosen": -0.5251217484474182, "logits/rejected": -0.4765167534351349, "logps/chosen": -279.79833984375, "logps/rejected": -366.5314636230469, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.2408782243728638, "rewards/margins": 14.95236587524414, "rewards/rejected": -16.1932430267334, "step": 2452 }, { "epoch": 0.84, "learning_rate": 1.307229217322423e-06, "logits/chosen": -0.5121290683746338, "logits/rejected": -0.49330052733421326, "logps/chosen": -269.7798156738281, "logps/rejected": -347.33050537109375, "loss": 0.0208, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3623686730861664, "rewards/margins": 13.764068603515625, "rewards/rejected": -14.126435279846191, "step": 2453 }, { "epoch": 0.84, "learning_rate": 1.306703193780088e-06, "logits/chosen": -0.49009233713150024, "logits/rejected": -0.4802095890045166, "logps/chosen": -178.3306427001953, "logps/rejected": -279.3834533691406, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.635615348815918, "rewards/margins": 10.848522186279297, "rewards/rejected": -11.484137535095215, "step": 2454 }, { "epoch": 0.84, "learning_rate": 1.3061770765441602e-06, "logits/chosen": -0.5425843596458435, "logits/rejected": -0.5180724859237671, "logps/chosen": -163.33872985839844, "logps/rejected": -302.80767822265625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.1576629877090454, "rewards/margins": 12.775221824645996, "rewards/rejected": -13.93288516998291, "step": 2455 }, { "epoch": 0.84, "learning_rate": 1.3056508657753608e-06, "logits/chosen": -0.46549099683761597, "logits/rejected": -0.4662875831127167, "logps/chosen": -155.6963653564453, "logps/rejected": -324.57220458984375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.903095006942749, "rewards/margins": 13.983959197998047, "rewards/rejected": -14.887055397033691, "step": 2456 }, { "epoch": 0.84, "learning_rate": 1.3051245616344398e-06, "logits/chosen": -0.5088294148445129, "logits/rejected": -0.5013548135757446, "logps/chosen": -172.4339141845703, "logps/rejected": -309.1806640625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.558056354522705, "rewards/margins": 13.084991455078125, "rewards/rejected": -14.643047332763672, "step": 2457 }, { "epoch": 0.84, "learning_rate": 1.304598164282176e-06, "logits/chosen": -0.4422615170478821, "logits/rejected": -0.4157601594924927, "logps/chosen": -143.13970947265625, "logps/rejected": -232.59915161132812, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5367867350578308, "rewards/margins": 11.488988876342773, "rewards/rejected": -12.025775909423828, "step": 2458 }, { "epoch": 0.84, "learning_rate": 1.3040716738793767e-06, "logits/chosen": -0.4708918631076813, "logits/rejected": -0.45883774757385254, "logps/chosen": -167.46807861328125, "logps/rejected": -261.24578857421875, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.9372583627700806, "rewards/margins": 10.950763702392578, "rewards/rejected": -11.888022422790527, "step": 2459 }, { "epoch": 0.84, "learning_rate": 1.3035450905868771e-06, "logits/chosen": -0.4045346975326538, "logits/rejected": -0.37244290113449097, "logps/chosen": -160.0706329345703, "logps/rejected": -249.7796173095703, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.9829511642456055, "rewards/margins": 10.585756301879883, "rewards/rejected": -11.568708419799805, "step": 2460 }, { "epoch": 0.84, "learning_rate": 1.3030184145655416e-06, "logits/chosen": -0.4618391692638397, "logits/rejected": -0.4455929398536682, "logps/chosen": -173.83380126953125, "logps/rejected": -284.4837341308594, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.1235358715057373, "rewards/margins": 10.60697078704834, "rewards/rejected": -11.73050594329834, "step": 2461 }, { "epoch": 0.84, "learning_rate": 1.3024916459762621e-06, "logits/chosen": -0.4051472246646881, "logits/rejected": -0.37606143951416016, "logps/chosen": -150.13255310058594, "logps/rejected": -265.47918701171875, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.1379650831222534, "rewards/margins": 11.011588096618652, "rewards/rejected": -12.149553298950195, "step": 2462 }, { "epoch": 0.84, "learning_rate": 1.301964784979959e-06, "logits/chosen": -0.45648616552352905, "logits/rejected": -0.4448472559452057, "logps/chosen": -138.7586212158203, "logps/rejected": -290.1750183105469, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.7184736728668213, "rewards/margins": 13.516334533691406, "rewards/rejected": -14.234807968139648, "step": 2463 }, { "epoch": 0.84, "learning_rate": 1.3014378317375815e-06, "logits/chosen": -0.5089468359947205, "logits/rejected": -0.4831518232822418, "logps/chosen": -219.24435424804688, "logps/rejected": -308.340576171875, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.9427656531333923, "rewards/margins": 10.062826156616211, "rewards/rejected": -11.00559139251709, "step": 2464 }, { "epoch": 0.84, "learning_rate": 1.3009107864101067e-06, "logits/chosen": -0.5398213267326355, "logits/rejected": -0.5187663435935974, "logps/chosen": -222.40789794921875, "logps/rejected": -349.7532043457031, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 1.105023741722107, "rewards/margins": 13.392484664916992, "rewards/rejected": -12.287460327148438, "step": 2465 }, { "epoch": 0.84, "learning_rate": 1.300383649158539e-06, "logits/chosen": -0.49340277910232544, "logits/rejected": -0.4944819509983063, "logps/chosen": -160.50450134277344, "logps/rejected": -372.3388366699219, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8051377534866333, "rewards/margins": 16.422744750976562, "rewards/rejected": -17.227882385253906, "step": 2466 }, { "epoch": 0.84, "learning_rate": 1.2998564201439115e-06, "logits/chosen": -0.6095374226570129, "logits/rejected": -0.5914759039878845, "logps/chosen": -144.5430145263672, "logps/rejected": -279.32281494140625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.15713611245155334, "rewards/margins": 13.401777267456055, "rewards/rejected": -13.558913230895996, "step": 2467 }, { "epoch": 0.84, "learning_rate": 1.299329099527286e-06, "logits/chosen": -0.49307069182395935, "logits/rejected": -0.44457143545150757, "logps/chosen": -212.3412322998047, "logps/rejected": -281.45428466796875, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 0.15275073051452637, "rewards/margins": 12.398880958557129, "rewards/rejected": -12.246129989624023, "step": 2468 }, { "epoch": 0.84, "learning_rate": 1.2988016874697516e-06, "logits/chosen": -0.5978044271469116, "logits/rejected": -0.5394377112388611, "logps/chosen": -215.4488525390625, "logps/rejected": -357.25274658203125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.728930652141571, "rewards/margins": 15.811509132385254, "rewards/rejected": -16.54043960571289, "step": 2469 }, { "epoch": 0.84, "learning_rate": 1.2982741841324252e-06, "logits/chosen": -0.5131096839904785, "logits/rejected": -0.5061245560646057, "logps/chosen": -174.32289123535156, "logps/rejected": -309.9998474121094, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.5103061199188232, "rewards/margins": 13.6840181350708, "rewards/rejected": -14.194324493408203, "step": 2470 }, { "epoch": 0.84, "learning_rate": 1.297746589676452e-06, "logits/chosen": -0.5124602913856506, "logits/rejected": -0.4806004464626312, "logps/chosen": -221.87850952148438, "logps/rejected": -402.1302185058594, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.2811150550842285, "rewards/margins": 17.56643295288086, "rewards/rejected": -18.847545623779297, "step": 2471 }, { "epoch": 0.84, "learning_rate": 1.2972189042630042e-06, "logits/chosen": -0.5049492120742798, "logits/rejected": -0.49909088015556335, "logps/chosen": -179.83612060546875, "logps/rejected": -254.12530517578125, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -0.6401299238204956, "rewards/margins": 10.73679256439209, "rewards/rejected": -11.376924514770508, "step": 2472 }, { "epoch": 0.84, "learning_rate": 1.2966911280532828e-06, "logits/chosen": -0.48576366901397705, "logits/rejected": -0.44689688086509705, "logps/chosen": -209.5334930419922, "logps/rejected": -285.0822448730469, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.11154180765151978, "rewards/margins": 11.16721248626709, "rewards/rejected": -11.278753280639648, "step": 2473 }, { "epoch": 0.84, "learning_rate": 1.2961632612085169e-06, "logits/chosen": -0.5800448060035706, "logits/rejected": -0.5404998064041138, "logps/chosen": -238.10293579101562, "logps/rejected": -331.0245361328125, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -0.16964244842529297, "rewards/margins": 13.878303527832031, "rewards/rejected": -14.04794692993164, "step": 2474 }, { "epoch": 0.84, "learning_rate": 1.2956353038899612e-06, "logits/chosen": -0.4479703903198242, "logits/rejected": -0.43878111243247986, "logps/chosen": -202.03216552734375, "logps/rejected": -382.7355041503906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8155462741851807, "rewards/margins": 13.707627296447754, "rewards/rejected": -14.523173332214355, "step": 2475 }, { "epoch": 0.85, "learning_rate": 1.2951072562589002e-06, "logits/chosen": -0.4310171604156494, "logits/rejected": -0.4238893687725067, "logps/chosen": -208.58958435058594, "logps/rejected": -357.00250244140625, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -2.1829657554626465, "rewards/margins": 14.629817962646484, "rewards/rejected": -16.812782287597656, "step": 2476 }, { "epoch": 0.85, "learning_rate": 1.294579118476645e-06, "logits/chosen": -0.5099777579307556, "logits/rejected": -0.4619043171405792, "logps/chosen": -175.87286376953125, "logps/rejected": -309.59979248046875, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.5688695907592773, "rewards/margins": 14.234501838684082, "rewards/rejected": -15.803372383117676, "step": 2477 }, { "epoch": 0.85, "learning_rate": 1.2940508907045344e-06, "logits/chosen": -0.5202528834342957, "logits/rejected": -0.5074263215065002, "logps/chosen": -217.70639038085938, "logps/rejected": -332.9871520996094, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.6612567901611328, "rewards/margins": 11.796260833740234, "rewards/rejected": -13.45751667022705, "step": 2478 }, { "epoch": 0.85, "learning_rate": 1.2935225731039348e-06, "logits/chosen": -0.4627721607685089, "logits/rejected": -0.4076485335826874, "logps/chosen": -222.5959014892578, "logps/rejected": -289.8507385253906, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.26519185304641724, "rewards/margins": 12.393260955810547, "rewards/rejected": -12.658452033996582, "step": 2479 }, { "epoch": 0.85, "learning_rate": 1.2929941658362397e-06, "logits/chosen": -0.5294972062110901, "logits/rejected": -0.533006489276886, "logps/chosen": -161.08547973632812, "logps/rejected": -319.0122985839844, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.9767953157424927, "rewards/margins": 13.240425109863281, "rewards/rejected": -14.217220306396484, "step": 2480 }, { "epoch": 0.85, "learning_rate": 1.2924656690628704e-06, "logits/chosen": -0.5643660426139832, "logits/rejected": -0.5525549650192261, "logps/chosen": -191.9860076904297, "logps/rejected": -322.98046875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.4679714441299438, "rewards/margins": 12.264519691467285, "rewards/rejected": -13.732492446899414, "step": 2481 }, { "epoch": 0.85, "learning_rate": 1.291937082945275e-06, "logits/chosen": -0.5008978843688965, "logits/rejected": -0.4887084662914276, "logps/chosen": -154.53179931640625, "logps/rejected": -274.8981628417969, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.78718101978302, "rewards/margins": 11.706526756286621, "rewards/rejected": -12.493707656860352, "step": 2482 }, { "epoch": 0.85, "learning_rate": 1.2914084076449298e-06, "logits/chosen": -0.4921891391277313, "logits/rejected": -0.46055060625076294, "logps/chosen": -184.26280212402344, "logps/rejected": -289.693603515625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.19276021420955658, "rewards/margins": 12.44571590423584, "rewards/rejected": -12.25295639038086, "step": 2483 }, { "epoch": 0.85, "learning_rate": 1.290879643323338e-06, "logits/chosen": -0.47595953941345215, "logits/rejected": -0.45305898785591125, "logps/chosen": -144.8676300048828, "logps/rejected": -256.5410461425781, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.4447219371795654, "rewards/margins": 11.781306266784668, "rewards/rejected": -13.226027488708496, "step": 2484 }, { "epoch": 0.85, "learning_rate": 1.2903507901420288e-06, "logits/chosen": -0.5215666890144348, "logits/rejected": -0.5273525714874268, "logps/chosen": -207.7415008544922, "logps/rejected": -348.6248474121094, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.5587533116340637, "rewards/margins": 12.59476375579834, "rewards/rejected": -13.15351676940918, "step": 2485 }, { "epoch": 0.85, "learning_rate": 1.2898218482625605e-06, "logits/chosen": -0.5490763783454895, "logits/rejected": -0.511576235294342, "logps/chosen": -230.32359313964844, "logps/rejected": -343.40155029296875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.5864511132240295, "rewards/margins": 15.81501579284668, "rewards/rejected": -15.228564262390137, "step": 2486 }, { "epoch": 0.85, "learning_rate": 1.2892928178465169e-06, "logits/chosen": -0.49477168917655945, "logits/rejected": -0.46791335940361023, "logps/chosen": -244.95530700683594, "logps/rejected": -341.9356994628906, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.393002986907959, "rewards/margins": 14.383668899536133, "rewards/rejected": -15.776670455932617, "step": 2487 }, { "epoch": 0.85, "learning_rate": 1.2887636990555096e-06, "logits/chosen": -0.6118412613868713, "logits/rejected": -0.5556760430335999, "logps/chosen": -260.73455810546875, "logps/rejected": -349.66571044921875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.20112571120262146, "rewards/margins": 16.934547424316406, "rewards/rejected": -17.13567352294922, "step": 2488 }, { "epoch": 0.85, "learning_rate": 1.2882344920511781e-06, "logits/chosen": -0.628984808921814, "logits/rejected": -0.5798735022544861, "logps/chosen": -238.49293518066406, "logps/rejected": -299.25421142578125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.7643085718154907, "rewards/margins": 10.613306045532227, "rewards/rejected": -12.377614974975586, "step": 2489 }, { "epoch": 0.85, "learning_rate": 1.2877051969951862e-06, "logits/chosen": -0.4751574695110321, "logits/rejected": -0.4613155424594879, "logps/chosen": -231.11268615722656, "logps/rejected": -353.72711181640625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 1.1186864376068115, "rewards/margins": 17.478248596191406, "rewards/rejected": -16.35956382751465, "step": 2490 }, { "epoch": 0.85, "learning_rate": 1.2871758140492273e-06, "logits/chosen": -0.47914060950279236, "logits/rejected": -0.4306206703186035, "logps/chosen": -315.00970458984375, "logps/rejected": -387.2782897949219, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": 1.0626696348190308, "rewards/margins": 15.911091804504395, "rewards/rejected": -14.848422050476074, "step": 2491 }, { "epoch": 0.85, "learning_rate": 1.2866463433750206e-06, "logits/chosen": -0.5028660893440247, "logits/rejected": -0.4687049984931946, "logps/chosen": -266.0188903808594, "logps/rejected": -337.8912353515625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.632725179195404, "rewards/margins": 13.212179183959961, "rewards/rejected": -13.844905853271484, "step": 2492 }, { "epoch": 0.85, "learning_rate": 1.2861167851343115e-06, "logits/chosen": -0.4945860505104065, "logits/rejected": -0.47150635719299316, "logps/chosen": -166.09417724609375, "logps/rejected": -345.164306640625, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -2.1828413009643555, "rewards/margins": 14.066426277160645, "rewards/rejected": -16.249267578125, "step": 2493 }, { "epoch": 0.85, "learning_rate": 1.285587139488873e-06, "logits/chosen": -0.5818395614624023, "logits/rejected": -0.5428297519683838, "logps/chosen": -258.0292053222656, "logps/rejected": -352.76220703125, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.864996075630188, "rewards/margins": 12.359909057617188, "rewards/rejected": -13.224905967712402, "step": 2494 }, { "epoch": 0.85, "learning_rate": 1.2850574066005047e-06, "logits/chosen": -0.43308398127555847, "logits/rejected": -0.41362929344177246, "logps/chosen": -151.1120147705078, "logps/rejected": -251.1807098388672, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 0.22368530929088593, "rewards/margins": 11.713149070739746, "rewards/rejected": -11.489462852478027, "step": 2495 }, { "epoch": 0.85, "learning_rate": 1.2845275866310324e-06, "logits/chosen": -0.4998614490032196, "logits/rejected": -0.4876517355442047, "logps/chosen": -181.8336639404297, "logps/rejected": -356.8946838378906, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.3265209197998047, "rewards/margins": 14.665950775146484, "rewards/rejected": -15.992471694946289, "step": 2496 }, { "epoch": 0.85, "learning_rate": 1.2839976797423088e-06, "logits/chosen": -0.45201849937438965, "logits/rejected": -0.409833163022995, "logps/chosen": -232.44932556152344, "logps/rejected": -325.8747253417969, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.14251574873924255, "rewards/margins": 13.564764022827148, "rewards/rejected": -13.70727825164795, "step": 2497 }, { "epoch": 0.85, "learning_rate": 1.283467686096214e-06, "logits/chosen": -0.47112101316452026, "logits/rejected": -0.42497557401657104, "logps/chosen": -264.1121520996094, "logps/rejected": -396.3504943847656, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.8456912040710449, "rewards/margins": 15.286026954650879, "rewards/rejected": -16.131717681884766, "step": 2498 }, { "epoch": 0.85, "learning_rate": 1.2829376058546526e-06, "logits/chosen": -0.49338725209236145, "logits/rejected": -0.4261930286884308, "logps/chosen": -203.8623046875, "logps/rejected": -308.83929443359375, "loss": 0.0314, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2980475425720215, "rewards/margins": 13.522984504699707, "rewards/rejected": -14.82103157043457, "step": 2499 }, { "epoch": 0.85, "learning_rate": 1.282407439179557e-06, "logits/chosen": -0.5067782998085022, "logits/rejected": -0.4889541566371918, "logps/chosen": -168.95608520507812, "logps/rejected": -312.7499084472656, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.47280949354171753, "rewards/margins": 16.28597640991211, "rewards/rejected": -16.758787155151367, "step": 2500 }, { "epoch": 0.85, "learning_rate": 1.2818771862328866e-06, "logits/chosen": -0.4539751410484314, "logits/rejected": -0.44310763478279114, "logps/chosen": -232.54995727539062, "logps/rejected": -374.3692626953125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.1836373507976532, "rewards/margins": 15.100189208984375, "rewards/rejected": -15.28382396697998, "step": 2501 }, { "epoch": 0.85, "learning_rate": 1.2813468471766251e-06, "logits/chosen": -0.5004691481590271, "logits/rejected": -0.4934370219707489, "logps/chosen": -193.98681640625, "logps/rejected": -364.6373291015625, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.8316810131072998, "rewards/margins": 15.051240921020508, "rewards/rejected": -15.88292121887207, "step": 2502 }, { "epoch": 0.85, "learning_rate": 1.2808164221727851e-06, "logits/chosen": -0.3887060880661011, "logits/rejected": -0.3780173659324646, "logps/chosen": -211.6269073486328, "logps/rejected": -322.9825744628906, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.10720914602279663, "rewards/margins": 12.147403717041016, "rewards/rejected": -12.254612922668457, "step": 2503 }, { "epoch": 0.85, "learning_rate": 1.2802859113834032e-06, "logits/chosen": -0.4295214116573334, "logits/rejected": -0.4127126634120941, "logps/chosen": -178.63153076171875, "logps/rejected": -298.9331970214844, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974032640457153, "rewards/margins": 12.546253204345703, "rewards/rejected": -13.543656349182129, "step": 2504 }, { "epoch": 0.85, "learning_rate": 1.2797553149705434e-06, "logits/chosen": -0.4682384133338928, "logits/rejected": -0.43461349606513977, "logps/chosen": -214.70164489746094, "logps/rejected": -331.50018310546875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.657664954662323, "rewards/margins": 13.389352798461914, "rewards/rejected": -14.047018051147461, "step": 2505 }, { "epoch": 0.86, "learning_rate": 1.2792246330962954e-06, "logits/chosen": -0.48332807421684265, "logits/rejected": -0.4481206238269806, "logps/chosen": -171.96987915039062, "logps/rejected": -296.237548828125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.84702467918396, "rewards/margins": 12.58343505859375, "rewards/rejected": -13.430459976196289, "step": 2506 }, { "epoch": 0.86, "learning_rate": 1.2786938659227757e-06, "logits/chosen": -0.46386414766311646, "logits/rejected": -0.44412416219711304, "logps/chosen": -210.84471130371094, "logps/rejected": -332.511962890625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.6265032291412354, "rewards/margins": 13.649409294128418, "rewards/rejected": -14.275912284851074, "step": 2507 }, { "epoch": 0.86, "learning_rate": 1.278163013612126e-06, "logits/chosen": -0.5563510656356812, "logits/rejected": -0.5022715926170349, "logps/chosen": -251.32876586914062, "logps/rejected": -348.5610656738281, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.3221941590309143, "rewards/margins": 13.841737747192383, "rewards/rejected": -14.163931846618652, "step": 2508 }, { "epoch": 0.86, "learning_rate": 1.277632076326514e-06, "logits/chosen": -0.4137515127658844, "logits/rejected": -0.39512014389038086, "logps/chosen": -172.999755859375, "logps/rejected": -309.32843017578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.13924823701381683, "rewards/margins": 15.341545104980469, "rewards/rejected": -15.20229721069336, "step": 2509 }, { "epoch": 0.86, "learning_rate": 1.2771010542281344e-06, "logits/chosen": -0.48445457220077515, "logits/rejected": -0.4600323736667633, "logps/chosen": -214.1209259033203, "logps/rejected": -338.4404296875, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -0.9953469038009644, "rewards/margins": 12.574835777282715, "rewards/rejected": -13.570181846618652, "step": 2510 }, { "epoch": 0.86, "learning_rate": 1.276569947479207e-06, "logits/chosen": -0.4551387429237366, "logits/rejected": -0.42738330364227295, "logps/chosen": -155.6650390625, "logps/rejected": -326.0425720214844, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9948875904083252, "rewards/margins": 15.627342224121094, "rewards/rejected": -16.622228622436523, "step": 2511 }, { "epoch": 0.86, "learning_rate": 1.276038756241977e-06, "logits/chosen": -0.49708378314971924, "logits/rejected": -0.46882522106170654, "logps/chosen": -201.0178985595703, "logps/rejected": -349.8360900878906, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.14392836391925812, "rewards/margins": 15.125312805175781, "rewards/rejected": -15.269242286682129, "step": 2512 }, { "epoch": 0.86, "learning_rate": 1.2755074806787166e-06, "logits/chosen": -0.44221651554107666, "logits/rejected": -0.42788803577423096, "logps/chosen": -134.4075469970703, "logps/rejected": -244.01644897460938, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.5755254030227661, "rewards/margins": 11.2979154586792, "rewards/rejected": -12.87343978881836, "step": 2513 }, { "epoch": 0.86, "learning_rate": 1.2749761209517229e-06, "logits/chosen": -0.5227423310279846, "logits/rejected": -0.5070292353630066, "logps/chosen": -213.4652862548828, "logps/rejected": -351.0755920410156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.716594934463501, "rewards/margins": 14.724591255187988, "rewards/rejected": -15.441186904907227, "step": 2514 }, { "epoch": 0.86, "learning_rate": 1.2744446772233187e-06, "logits/chosen": -0.4027293622493744, "logits/rejected": -0.38382387161254883, "logps/chosen": -143.85031127929688, "logps/rejected": -281.1193542480469, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.0179643630981445, "rewards/margins": 13.975884437561035, "rewards/rejected": -14.99384880065918, "step": 2515 }, { "epoch": 0.86, "learning_rate": 1.2739131496558535e-06, "logits/chosen": -0.44655299186706543, "logits/rejected": -0.4305752217769623, "logps/chosen": -246.42466735839844, "logps/rejected": -385.04510498046875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.6793806552886963, "rewards/margins": 14.638221740722656, "rewards/rejected": -13.958841323852539, "step": 2516 }, { "epoch": 0.86, "learning_rate": 1.2733815384117011e-06, "logits/chosen": -0.5559980869293213, "logits/rejected": -0.5640329122543335, "logps/chosen": -128.0772705078125, "logps/rejected": -293.51678466796875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.7417376041412354, "rewards/margins": 14.268097877502441, "rewards/rejected": -16.009836196899414, "step": 2517 }, { "epoch": 0.86, "learning_rate": 1.2728498436532612e-06, "logits/chosen": -0.36078670620918274, "logits/rejected": -0.32722076773643494, "logps/chosen": -180.32403564453125, "logps/rejected": -297.6274719238281, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -1.1157443523406982, "rewards/margins": 12.164989471435547, "rewards/rejected": -13.280734062194824, "step": 2518 }, { "epoch": 0.86, "learning_rate": 1.2723180655429596e-06, "logits/chosen": -0.48950034379959106, "logits/rejected": -0.46444857120513916, "logps/chosen": -230.909423828125, "logps/rejected": -371.81988525390625, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": 0.3179614543914795, "rewards/margins": 14.263880729675293, "rewards/rejected": -13.945919036865234, "step": 2519 }, { "epoch": 0.86, "learning_rate": 1.271786204243247e-06, "logits/chosen": -0.5901761054992676, "logits/rejected": -0.5741787552833557, "logps/chosen": -210.2410888671875, "logps/rejected": -299.71295166015625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.6076964139938354, "rewards/margins": 9.943991661071777, "rewards/rejected": -11.551689147949219, "step": 2520 }, { "epoch": 0.86, "learning_rate": 1.2712542599165995e-06, "logits/chosen": -0.40811213850975037, "logits/rejected": -0.37923237681388855, "logps/chosen": -214.99765014648438, "logps/rejected": -336.0181884765625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9460581541061401, "rewards/margins": 13.199151039123535, "rewards/rejected": -14.145208358764648, "step": 2521 }, { "epoch": 0.86, "learning_rate": 1.2707222327255199e-06, "logits/chosen": -0.3595455288887024, "logits/rejected": -0.3337703049182892, "logps/chosen": -124.6580581665039, "logps/rejected": -191.38319396972656, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -1.2041751146316528, "rewards/margins": 9.82846450805664, "rewards/rejected": -11.03264045715332, "step": 2522 }, { "epoch": 0.86, "learning_rate": 1.2701901228325333e-06, "logits/chosen": -0.41992834210395813, "logits/rejected": -0.41406917572021484, "logps/chosen": -179.1107177734375, "logps/rejected": -334.7421875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3795045614242554, "rewards/margins": 14.724422454833984, "rewards/rejected": -16.103925704956055, "step": 2523 }, { "epoch": 0.86, "learning_rate": 1.2696579304001934e-06, "logits/chosen": -0.3953150510787964, "logits/rejected": -0.3911297023296356, "logps/chosen": -164.1071319580078, "logps/rejected": -293.99798583984375, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -0.36260998249053955, "rewards/margins": 11.742834091186523, "rewards/rejected": -12.105443954467773, "step": 2524 }, { "epoch": 0.86, "learning_rate": 1.2691256555910768e-06, "logits/chosen": -0.32570502161979675, "logits/rejected": -0.2911503314971924, "logps/chosen": -238.62750244140625, "logps/rejected": -373.6063537597656, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2413179874420166, "rewards/margins": 14.842833518981934, "rewards/rejected": -16.084150314331055, "step": 2525 }, { "epoch": 0.86, "learning_rate": 1.2685932985677866e-06, "logits/chosen": -0.4034728705883026, "logits/rejected": -0.37323734164237976, "logps/chosen": -190.697998046875, "logps/rejected": -292.9683837890625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.127656489610672, "rewards/margins": 13.358692169189453, "rewards/rejected": -13.486347198486328, "step": 2526 }, { "epoch": 0.86, "learning_rate": 1.2680608594929503e-06, "logits/chosen": -0.5084453225135803, "logits/rejected": -0.4962632656097412, "logps/chosen": -202.39822387695312, "logps/rejected": -322.9809875488281, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.4230877161026, "rewards/margins": 11.823808670043945, "rewards/rejected": -13.246896743774414, "step": 2527 }, { "epoch": 0.86, "learning_rate": 1.2675283385292211e-06, "logits/chosen": -0.527721643447876, "logits/rejected": -0.47605079412460327, "logps/chosen": -195.80377197265625, "logps/rejected": -229.69570922851562, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.135076642036438, "rewards/margins": 11.53276538848877, "rewards/rejected": -12.667841911315918, "step": 2528 }, { "epoch": 0.86, "learning_rate": 1.2669957358392758e-06, "logits/chosen": -0.28988969326019287, "logits/rejected": -0.27336621284484863, "logps/chosen": -119.59158325195312, "logps/rejected": -190.6893310546875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.9838255047798157, "rewards/margins": 9.066471099853516, "rewards/rejected": -10.05029582977295, "step": 2529 }, { "epoch": 0.86, "learning_rate": 1.2664630515858181e-06, "logits/chosen": -0.5300782918930054, "logits/rejected": -0.5016630291938782, "logps/chosen": -273.61956787109375, "logps/rejected": -405.7848205566406, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.5140236020088196, "rewards/margins": 15.622997283935547, "rewards/rejected": -16.137022018432617, "step": 2530 }, { "epoch": 0.86, "learning_rate": 1.2659302859315754e-06, "logits/chosen": -0.42699316143989563, "logits/rejected": -0.40312251448631287, "logps/chosen": -226.44168090820312, "logps/rejected": -371.1930236816406, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": -1.0885345935821533, "rewards/margins": 15.54788875579834, "rewards/rejected": -16.636423110961914, "step": 2531 }, { "epoch": 0.86, "learning_rate": 1.2653974390393005e-06, "logits/chosen": -0.5068669319152832, "logits/rejected": -0.47361138463020325, "logps/chosen": -191.04855346679688, "logps/rejected": -372.1289367675781, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.4635200500488281, "rewards/margins": 16.865711212158203, "rewards/rejected": -18.32923126220703, "step": 2532 }, { "epoch": 0.86, "learning_rate": 1.2648645110717704e-06, "logits/chosen": -0.43251511454582214, "logits/rejected": -0.4174395203590393, "logps/chosen": -229.26148986816406, "logps/rejected": -394.18450927734375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9537566304206848, "rewards/margins": 15.356910705566406, "rewards/rejected": -16.3106689453125, "step": 2533 }, { "epoch": 0.86, "learning_rate": 1.2643315021917874e-06, "logits/chosen": -0.4485079348087311, "logits/rejected": -0.43668851256370544, "logps/chosen": -152.51751708984375, "logps/rejected": -340.6437072753906, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -2.0298964977264404, "rewards/margins": 14.920572280883789, "rewards/rejected": -16.95046615600586, "step": 2534 }, { "epoch": 0.87, "learning_rate": 1.2637984125621781e-06, "logits/chosen": -0.39035266637802124, "logits/rejected": -0.33718910813331604, "logps/chosen": -233.04107666015625, "logps/rejected": -280.1921081542969, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.050260357558727264, "rewards/margins": 12.46554183959961, "rewards/rejected": -12.415281295776367, "step": 2535 }, { "epoch": 0.87, "learning_rate": 1.2632652423457946e-06, "logits/chosen": -0.4321568012237549, "logits/rejected": -0.41374772787094116, "logps/chosen": -251.207763671875, "logps/rejected": -385.74456787109375, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 0.25865358114242554, "rewards/margins": 14.9989595413208, "rewards/rejected": -14.740306854248047, "step": 2536 }, { "epoch": 0.87, "learning_rate": 1.2627319917055125e-06, "logits/chosen": -0.37188082933425903, "logits/rejected": -0.3381298780441284, "logps/chosen": -182.33670043945312, "logps/rejected": -310.8833312988281, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.3554693460464478, "rewards/margins": 14.85534381866455, "rewards/rejected": -16.210811614990234, "step": 2537 }, { "epoch": 0.87, "learning_rate": 1.2621986608042328e-06, "logits/chosen": -0.46221256256103516, "logits/rejected": -0.4679419696331024, "logps/chosen": -209.1242218017578, "logps/rejected": -353.7173156738281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.30258041620254517, "rewards/margins": 14.613466262817383, "rewards/rejected": -14.310885429382324, "step": 2538 }, { "epoch": 0.87, "learning_rate": 1.2616652498048803e-06, "logits/chosen": -0.3731759786605835, "logits/rejected": -0.3652290999889374, "logps/chosen": -192.5491485595703, "logps/rejected": -359.57830810546875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.8243036270141602, "rewards/margins": 14.041950225830078, "rewards/rejected": -15.866254806518555, "step": 2539 }, { "epoch": 0.87, "learning_rate": 1.2611317588704056e-06, "logits/chosen": -0.4186376929283142, "logits/rejected": -0.4055330157279968, "logps/chosen": -125.19322204589844, "logps/rejected": -254.56053161621094, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.9856027364730835, "rewards/margins": 12.792757987976074, "rewards/rejected": -14.778360366821289, "step": 2540 }, { "epoch": 0.87, "learning_rate": 1.260598188163782e-06, "logits/chosen": -0.4413398206233978, "logits/rejected": -0.4390593469142914, "logps/chosen": -214.02395629882812, "logps/rejected": -363.224365234375, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.5868301391601562, "rewards/margins": 13.577559471130371, "rewards/rejected": -14.164388656616211, "step": 2541 }, { "epoch": 0.87, "learning_rate": 1.2600645378480081e-06, "logits/chosen": -0.32761186361312866, "logits/rejected": -0.3195731043815613, "logps/chosen": -141.2158966064453, "logps/rejected": -251.72525024414062, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 0.2617325484752655, "rewards/margins": 12.708099365234375, "rewards/rejected": -12.446367263793945, "step": 2542 }, { "epoch": 0.87, "learning_rate": 1.2595308080861072e-06, "logits/chosen": -0.3902990221977234, "logits/rejected": -0.3623496890068054, "logps/chosen": -233.0139617919922, "logps/rejected": -350.994384765625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3577126264572144, "rewards/margins": 14.671466827392578, "rewards/rejected": -16.0291805267334, "step": 2543 }, { "epoch": 0.87, "learning_rate": 1.2589969990411255e-06, "logits/chosen": -0.31504327058792114, "logits/rejected": -0.30355557799339294, "logps/chosen": -140.07894897460938, "logps/rejected": -251.4746856689453, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.04353928565979, "rewards/margins": 11.360638618469238, "rewards/rejected": -12.404178619384766, "step": 2544 }, { "epoch": 0.87, "learning_rate": 1.2584631108761352e-06, "logits/chosen": -0.3459832966327667, "logits/rejected": -0.3373335599899292, "logps/chosen": -177.267822265625, "logps/rejected": -324.29791259765625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.4027022123336792, "rewards/margins": 15.406112670898438, "rewards/rejected": -15.808815002441406, "step": 2545 }, { "epoch": 0.87, "learning_rate": 1.2579291437542314e-06, "logits/chosen": -0.45843079686164856, "logits/rejected": -0.4342194199562073, "logps/chosen": -201.93505859375, "logps/rejected": -309.448974609375, "loss": 0.0538, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8389266729354858, "rewards/margins": 13.402213096618652, "rewards/rejected": -14.241140365600586, "step": 2546 }, { "epoch": 0.87, "learning_rate": 1.257395097838534e-06, "logits/chosen": -0.4379943311214447, "logits/rejected": -0.4048652946949005, "logps/chosen": -228.86517333984375, "logps/rejected": -302.30401611328125, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.35025641322135925, "rewards/margins": 12.30788516998291, "rewards/rejected": -12.65814208984375, "step": 2547 }, { "epoch": 0.87, "learning_rate": 1.2568609732921857e-06, "logits/chosen": -0.46397778391838074, "logits/rejected": -0.4363498389720917, "logps/chosen": -169.99490356445312, "logps/rejected": -315.2509460449219, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.0387170314788818, "rewards/margins": 15.039314270019531, "rewards/rejected": -16.078031539916992, "step": 2548 }, { "epoch": 0.87, "learning_rate": 1.2563267702783554e-06, "logits/chosen": -0.46411487460136414, "logits/rejected": -0.4500372111797333, "logps/chosen": -181.18385314941406, "logps/rejected": -342.0605163574219, "loss": 0.0427, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6004071235656738, "rewards/margins": 15.083464622497559, "rewards/rejected": -16.68387222290039, "step": 2549 }, { "epoch": 0.87, "learning_rate": 1.2557924889602338e-06, "logits/chosen": -0.5168541073799133, "logits/rejected": -0.4934607148170471, "logps/chosen": -218.3816375732422, "logps/rejected": -330.5320739746094, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.4167083501815796, "rewards/margins": 12.707869529724121, "rewards/rejected": -14.124578475952148, "step": 2550 }, { "epoch": 0.87, "learning_rate": 1.2552581295010366e-06, "logits/chosen": -0.4757194221019745, "logits/rejected": -0.4598853588104248, "logps/chosen": -207.77517700195312, "logps/rejected": -332.6474914550781, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -0.3224441707134247, "rewards/margins": 15.118379592895508, "rewards/rejected": -15.440824508666992, "step": 2551 }, { "epoch": 0.87, "learning_rate": 1.2547236920640041e-06, "logits/chosen": -0.4926108121871948, "logits/rejected": -0.4478819966316223, "logps/chosen": -212.98545837402344, "logps/rejected": -273.1975402832031, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.059600591659546, "rewards/margins": 10.271076202392578, "rewards/rejected": -12.330678939819336, "step": 2552 }, { "epoch": 0.87, "learning_rate": 1.2541891768123985e-06, "logits/chosen": -0.4088284373283386, "logits/rejected": -0.34859585762023926, "logps/chosen": -230.70106506347656, "logps/rejected": -331.3243408203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2501733303070068, "rewards/margins": 13.870777130126953, "rewards/rejected": -15.120949745178223, "step": 2553 }, { "epoch": 0.87, "learning_rate": 1.2536545839095072e-06, "logits/chosen": -0.4054545760154724, "logits/rejected": -0.3975219428539276, "logps/chosen": -207.74258422851562, "logps/rejected": -347.730712890625, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -1.0416748523712158, "rewards/margins": 15.276012420654297, "rewards/rejected": -16.317686080932617, "step": 2554 }, { "epoch": 0.87, "learning_rate": 1.2531199135186415e-06, "logits/chosen": -0.4593382477760315, "logits/rejected": -0.4324992597103119, "logps/chosen": -245.2761993408203, "logps/rejected": -376.07843017578125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.4485483169555664, "rewards/margins": 14.824958801269531, "rewards/rejected": -16.27350616455078, "step": 2555 }, { "epoch": 0.87, "learning_rate": 1.252585165803135e-06, "logits/chosen": -0.4034155607223511, "logits/rejected": -0.39729657769203186, "logps/chosen": -167.2654571533203, "logps/rejected": -294.87554931640625, "loss": 0.0342, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2321175336837769, "rewards/margins": 10.393047332763672, "rewards/rejected": -11.625165939331055, "step": 2556 }, { "epoch": 0.87, "learning_rate": 1.252050340926346e-06, "logits/chosen": -0.4669502377510071, "logits/rejected": -0.4664791524410248, "logps/chosen": -204.8077850341797, "logps/rejected": -377.8301086425781, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.6799311637878418, "rewards/margins": 15.292271614074707, "rewards/rejected": -16.97220230102539, "step": 2557 }, { "epoch": 0.87, "learning_rate": 1.2515154390516567e-06, "logits/chosen": -0.398407906293869, "logits/rejected": -0.36516281962394714, "logps/chosen": -191.64453125, "logps/rejected": -324.401123046875, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -0.3362436294555664, "rewards/margins": 13.578571319580078, "rewards/rejected": -13.914815902709961, "step": 2558 }, { "epoch": 0.87, "learning_rate": 1.2509804603424711e-06, "logits/chosen": -0.324871301651001, "logits/rejected": -0.31678587198257446, "logps/chosen": -77.94905853271484, "logps/rejected": -178.2486572265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.500475287437439, "rewards/margins": 9.488999366760254, "rewards/rejected": -9.98947525024414, "step": 2559 }, { "epoch": 0.87, "learning_rate": 1.2504454049622191e-06, "logits/chosen": -0.4603870213031769, "logits/rejected": -0.45450448989868164, "logps/chosen": -169.10995483398438, "logps/rejected": -355.32305908203125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0169312953948975, "rewards/margins": 15.56863784790039, "rewards/rejected": -16.585567474365234, "step": 2560 }, { "epoch": 0.87, "learning_rate": 1.2499102730743517e-06, "logits/chosen": -0.40101566910743713, "logits/rejected": -0.3742320239543915, "logps/chosen": -216.14389038085938, "logps/rejected": -319.51812744140625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.3654158115386963, "rewards/margins": 13.502632141113281, "rewards/rejected": -14.868047714233398, "step": 2561 }, { "epoch": 0.87, "learning_rate": 1.2493750648423449e-06, "logits/chosen": -0.49476706981658936, "logits/rejected": -0.4728560745716095, "logps/chosen": -288.38568115234375, "logps/rejected": -400.9754638671875, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 0.013006936758756638, "rewards/margins": 15.359969139099121, "rewards/rejected": -15.34696102142334, "step": 2562 }, { "epoch": 0.87, "learning_rate": 1.2488397804296972e-06, "logits/chosen": -0.4472509026527405, "logits/rejected": -0.4295212924480438, "logps/chosen": -149.77999877929688, "logps/rejected": -318.0013427734375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.2475569248199463, "rewards/margins": 15.80911636352539, "rewards/rejected": -17.05667495727539, "step": 2563 }, { "epoch": 0.88, "learning_rate": 1.2483044199999307e-06, "logits/chosen": -0.42039570212364197, "logits/rejected": -0.3938545286655426, "logps/chosen": -225.37286376953125, "logps/rejected": -341.4250793457031, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.4514301121234894, "rewards/margins": 15.056347846984863, "rewards/rejected": -15.507777214050293, "step": 2564 }, { "epoch": 0.88, "learning_rate": 1.2477689837165905e-06, "logits/chosen": -0.3085442781448364, "logits/rejected": -0.28046914935112, "logps/chosen": -159.26358032226562, "logps/rejected": -275.314453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.15807586908340454, "rewards/margins": 12.505800247192383, "rewards/rejected": -12.663875579833984, "step": 2565 }, { "epoch": 0.88, "learning_rate": 1.2472334717432448e-06, "logits/chosen": -0.5280988812446594, "logits/rejected": -0.47030365467071533, "logps/chosen": -230.35043334960938, "logps/rejected": -288.73089599609375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.04129394143819809, "rewards/margins": 12.42517375946045, "rewards/rejected": -12.466466903686523, "step": 2566 }, { "epoch": 0.88, "learning_rate": 1.2466978842434858e-06, "logits/chosen": -0.4782792627811432, "logits/rejected": -0.44358116388320923, "logps/chosen": -279.0504455566406, "logps/rejected": -370.7138671875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0987147092819214, "rewards/margins": 14.140628814697266, "rewards/rejected": -15.23934268951416, "step": 2567 }, { "epoch": 0.88, "learning_rate": 1.2461622213809275e-06, "logits/chosen": -0.39989057183265686, "logits/rejected": -0.370743066072464, "logps/chosen": -118.73844146728516, "logps/rejected": -246.7672119140625, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -1.2178713083267212, "rewards/margins": 10.58171558380127, "rewards/rejected": -11.799586296081543, "step": 2568 }, { "epoch": 0.88, "learning_rate": 1.2456264833192078e-06, "logits/chosen": -0.3812065124511719, "logits/rejected": -0.346189945936203, "logps/chosen": -258.1871643066406, "logps/rejected": -348.985595703125, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.06986116617918015, "rewards/margins": 15.254921913146973, "rewards/rejected": -15.18505859375, "step": 2569 }, { "epoch": 0.88, "learning_rate": 1.2450906702219868e-06, "logits/chosen": -0.35451585054397583, "logits/rejected": -0.32761305570602417, "logps/chosen": -168.7964630126953, "logps/rejected": -307.1672668457031, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.135082721710205, "rewards/margins": 13.943867683410645, "rewards/rejected": -15.078950881958008, "step": 2570 }, { "epoch": 0.88, "learning_rate": 1.2445547822529488e-06, "logits/chosen": -0.3971482813358307, "logits/rejected": -0.39290598034858704, "logps/chosen": -209.775146484375, "logps/rejected": -356.2743225097656, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -1.4262200593948364, "rewards/margins": 13.590079307556152, "rewards/rejected": -15.0162992477417, "step": 2571 }, { "epoch": 0.88, "learning_rate": 1.2440188195757995e-06, "logits/chosen": -0.37671810388565063, "logits/rejected": -0.3839400112628937, "logps/chosen": -267.943115234375, "logps/rejected": -474.5901184082031, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -2.0085525512695312, "rewards/margins": 17.039426803588867, "rewards/rejected": -19.04798126220703, "step": 2572 }, { "epoch": 0.88, "learning_rate": 1.2434827823542684e-06, "logits/chosen": -0.5134382843971252, "logits/rejected": -0.48398977518081665, "logps/chosen": -186.6164093017578, "logps/rejected": -353.18646240234375, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -0.767768144607544, "rewards/margins": 14.931714057922363, "rewards/rejected": -15.699481964111328, "step": 2573 }, { "epoch": 0.88, "learning_rate": 1.2429466707521075e-06, "logits/chosen": -0.48249298334121704, "logits/rejected": -0.42917346954345703, "logps/chosen": -188.71640014648438, "logps/rejected": -266.887939453125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.5125883221626282, "rewards/margins": 12.556467056274414, "rewards/rejected": -13.069056510925293, "step": 2574 }, { "epoch": 0.88, "learning_rate": 1.2424104849330914e-06, "logits/chosen": -0.37070006132125854, "logits/rejected": -0.33285775780677795, "logps/chosen": -222.49122619628906, "logps/rejected": -349.3299560546875, "loss": 0.0271, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6834497451782227, "rewards/margins": 14.557433128356934, "rewards/rejected": -15.240882873535156, "step": 2575 }, { "epoch": 0.88, "learning_rate": 1.2418742250610172e-06, "logits/chosen": -0.4474055767059326, "logits/rejected": -0.4253236949443817, "logps/chosen": -170.5072784423828, "logps/rejected": -314.3311767578125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.8007066249847412, "rewards/margins": 15.515124320983887, "rewards/rejected": -16.31583023071289, "step": 2576 }, { "epoch": 0.88, "learning_rate": 1.2413378912997057e-06, "logits/chosen": -0.3819534480571747, "logits/rejected": -0.36585259437561035, "logps/chosen": -199.17420959472656, "logps/rejected": -288.97528076171875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.015376776456832886, "rewards/margins": 11.08039665222168, "rewards/rejected": -11.095773696899414, "step": 2577 }, { "epoch": 0.88, "learning_rate": 1.2408014838129986e-06, "logits/chosen": -0.4105006158351898, "logits/rejected": -0.3655855357646942, "logps/chosen": -234.3013153076172, "logps/rejected": -317.4085693359375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.49149540066719055, "rewards/margins": 15.629642486572266, "rewards/rejected": -15.138147354125977, "step": 2578 }, { "epoch": 0.88, "learning_rate": 1.2402650027647614e-06, "logits/chosen": -0.30905693769454956, "logits/rejected": -0.2685577869415283, "logps/chosen": -180.60264587402344, "logps/rejected": -300.083984375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.104992151260376, "rewards/margins": 12.720775604248047, "rewards/rejected": -13.825769424438477, "step": 2579 }, { "epoch": 0.88, "learning_rate": 1.2397284483188817e-06, "logits/chosen": -0.3517574369907379, "logits/rejected": -0.33955180644989014, "logps/chosen": -185.15109252929688, "logps/rejected": -301.73602294921875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.0102018117904663, "rewards/margins": 11.529610633850098, "rewards/rejected": -12.539813995361328, "step": 2580 }, { "epoch": 0.88, "learning_rate": 1.239191820639269e-06, "logits/chosen": -0.4468284249305725, "logits/rejected": -0.4366969168186188, "logps/chosen": -221.89825439453125, "logps/rejected": -344.25506591796875, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.3528156280517578, "rewards/margins": 12.61444091796875, "rewards/rejected": -12.967254638671875, "step": 2581 }, { "epoch": 0.88, "learning_rate": 1.2386551198898564e-06, "logits/chosen": -0.39544713497161865, "logits/rejected": -0.36629191040992737, "logps/chosen": -201.843505859375, "logps/rejected": -348.1922607421875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.1752519607543945, "rewards/margins": 15.243793487548828, "rewards/rejected": -16.419044494628906, "step": 2582 }, { "epoch": 0.88, "learning_rate": 1.2381183462345982e-06, "logits/chosen": -0.272370308637619, "logits/rejected": -0.24857959151268005, "logps/chosen": -119.92557525634766, "logps/rejected": -271.56878662109375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.196904182434082, "rewards/margins": 12.766386032104492, "rewards/rejected": -13.963292121887207, "step": 2583 }, { "epoch": 0.88, "learning_rate": 1.2375814998374711e-06, "logits/chosen": -0.42248424887657166, "logits/rejected": -0.37571296095848083, "logps/chosen": -230.98162841796875, "logps/rejected": -329.4123229980469, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.20800578594207764, "rewards/margins": 14.024619102478027, "rewards/rejected": -14.232625961303711, "step": 2584 }, { "epoch": 0.88, "learning_rate": 1.2370445808624745e-06, "logits/chosen": -0.445549339056015, "logits/rejected": -0.4146101772785187, "logps/chosen": -289.83148193359375, "logps/rejected": -449.77191162109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.17821785807609558, "rewards/margins": 17.173322677612305, "rewards/rejected": -17.35154151916504, "step": 2585 }, { "epoch": 0.88, "learning_rate": 1.23650758947363e-06, "logits/chosen": -0.4161607325077057, "logits/rejected": -0.38168731331825256, "logps/chosen": -189.9818115234375, "logps/rejected": -289.6672058105469, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -1.110648512840271, "rewards/margins": 12.71614933013916, "rewards/rejected": -13.826797485351562, "step": 2586 }, { "epoch": 0.88, "learning_rate": 1.2359705258349803e-06, "logits/chosen": -0.48609644174575806, "logits/rejected": -0.47833046317100525, "logps/chosen": -165.057373046875, "logps/rejected": -283.5748291015625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.07181794941425323, "rewards/margins": 11.616743087768555, "rewards/rejected": -11.688560485839844, "step": 2587 }, { "epoch": 0.88, "learning_rate": 1.235433390110592e-06, "logits/chosen": -0.3017178475856781, "logits/rejected": -0.26981106400489807, "logps/chosen": -161.31002807617188, "logps/rejected": -260.3707275390625, "loss": 0.0358, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8473011255264282, "rewards/margins": 14.051794052124023, "rewards/rejected": -14.89909553527832, "step": 2588 }, { "epoch": 0.88, "learning_rate": 1.2348961824645518e-06, "logits/chosen": -0.3550831079483032, "logits/rejected": -0.33308646082878113, "logps/chosen": -273.68988037109375, "logps/rejected": -471.91888427734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.065794825553894, "rewards/margins": 17.494104385375977, "rewards/rejected": -18.559898376464844, "step": 2589 }, { "epoch": 0.88, "learning_rate": 1.2343589030609695e-06, "logits/chosen": -0.2653687596321106, "logits/rejected": -0.23801159858703613, "logps/chosen": -100.704345703125, "logps/rejected": -199.4917449951172, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2225136756896973, "rewards/margins": 11.147075653076172, "rewards/rejected": -12.369588851928711, "step": 2590 }, { "epoch": 0.88, "learning_rate": 1.2338215520639768e-06, "logits/chosen": -0.42739731073379517, "logits/rejected": -0.4004945456981659, "logps/chosen": -269.2403259277344, "logps/rejected": -355.85186767578125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.2654145061969757, "rewards/margins": 11.890846252441406, "rewards/rejected": -11.625432014465332, "step": 2591 }, { "epoch": 0.88, "learning_rate": 1.2332841296377263e-06, "logits/chosen": -0.295477032661438, "logits/rejected": -0.30084094405174255, "logps/chosen": -149.44149780273438, "logps/rejected": -280.8939208984375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.367777705192566, "rewards/margins": 9.307732582092285, "rewards/rejected": -10.67551040649414, "step": 2592 }, { "epoch": 0.88, "learning_rate": 1.2327466359463939e-06, "logits/chosen": -0.4405320882797241, "logits/rejected": -0.4083980619907379, "logps/chosen": -229.1051025390625, "logps/rejected": -412.6603088378906, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -3.1548244953155518, "rewards/margins": 18.331878662109375, "rewards/rejected": -21.48670196533203, "step": 2593 }, { "epoch": 0.89, "learning_rate": 1.2322090711541759e-06, "logits/chosen": -0.46692049503326416, "logits/rejected": -0.38577067852020264, "logps/chosen": -259.39959716796875, "logps/rejected": -362.0934753417969, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.6207937002182007, "rewards/margins": 14.408459663391113, "rewards/rejected": -15.029253005981445, "step": 2594 }, { "epoch": 0.89, "learning_rate": 1.2316714354252915e-06, "logits/chosen": -0.43361589312553406, "logits/rejected": -0.4090675413608551, "logps/chosen": -217.284912109375, "logps/rejected": -330.8565368652344, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.3129715323448181, "rewards/margins": 12.733904838562012, "rewards/rejected": -13.046878814697266, "step": 2595 }, { "epoch": 0.89, "learning_rate": 1.2311337289239805e-06, "logits/chosen": -0.29687806963920593, "logits/rejected": -0.28737348318099976, "logps/chosen": -206.74508666992188, "logps/rejected": -324.0734558105469, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.3040480613708496, "rewards/margins": 13.062271118164062, "rewards/rejected": -14.36631965637207, "step": 2596 }, { "epoch": 0.89, "learning_rate": 1.2305959518145053e-06, "logits/chosen": -0.4240949749946594, "logits/rejected": -0.386287122964859, "logps/chosen": -275.3070068359375, "logps/rejected": -402.29522705078125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.09893175959587097, "rewards/margins": 15.984874725341797, "rewards/rejected": -16.08380699157715, "step": 2597 }, { "epoch": 0.89, "learning_rate": 1.230058104261149e-06, "logits/chosen": -0.3476775884628296, "logits/rejected": -0.3344321548938751, "logps/chosen": -188.31008911132812, "logps/rejected": -329.3561706542969, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -1.7527519464492798, "rewards/margins": 12.155146598815918, "rewards/rejected": -13.90789794921875, "step": 2598 }, { "epoch": 0.89, "learning_rate": 1.2295201864282164e-06, "logits/chosen": -0.36553239822387695, "logits/rejected": -0.3315061330795288, "logps/chosen": -231.97084045410156, "logps/rejected": -415.6258850097656, "loss": 0.0198, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9628030061721802, "rewards/margins": 17.89615821838379, "rewards/rejected": -18.85896110534668, "step": 2599 }, { "epoch": 0.89, "learning_rate": 1.2289821984800346e-06, "logits/chosen": -0.3238571286201477, "logits/rejected": -0.32182347774505615, "logps/chosen": -151.6171112060547, "logps/rejected": -306.04803466796875, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.24475611746311188, "rewards/margins": 14.135452270507812, "rewards/rejected": -14.380208015441895, "step": 2600 }, { "epoch": 0.89, "learning_rate": 1.228444140580951e-06, "logits/chosen": -0.31126588582992554, "logits/rejected": -0.2684418261051178, "logps/chosen": -200.56500244140625, "logps/rejected": -364.654541015625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.4271600246429443, "rewards/margins": 15.214664459228516, "rewards/rejected": -17.64182472229004, "step": 2601 }, { "epoch": 0.89, "learning_rate": 1.2279060128953353e-06, "logits/chosen": -0.4146254062652588, "logits/rejected": -0.38527706265449524, "logps/chosen": -200.82777404785156, "logps/rejected": -365.3913879394531, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.11435621976852417, "rewards/margins": 17.576385498046875, "rewards/rejected": -17.69074249267578, "step": 2602 }, { "epoch": 0.89, "learning_rate": 1.227367815587578e-06, "logits/chosen": -0.34972625970840454, "logits/rejected": -0.33496761322021484, "logps/chosen": -190.36355590820312, "logps/rejected": -355.95794677734375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.852240800857544, "rewards/margins": 16.01459312438965, "rewards/rejected": -16.86683464050293, "step": 2603 }, { "epoch": 0.89, "learning_rate": 1.2268295488220905e-06, "logits/chosen": -0.37110862135887146, "logits/rejected": -0.35022154450416565, "logps/chosen": -185.57081604003906, "logps/rejected": -355.5292663574219, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.7492787837982178, "rewards/margins": 15.716785430908203, "rewards/rejected": -16.466064453125, "step": 2604 }, { "epoch": 0.89, "learning_rate": 1.226291212763306e-06, "logits/chosen": -0.3345039188861847, "logits/rejected": -0.3409058451652527, "logps/chosen": -159.7254638671875, "logps/rejected": -300.3916015625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.983905792236328, "rewards/margins": 12.458910942077637, "rewards/rejected": -15.442816734313965, "step": 2605 }, { "epoch": 0.89, "learning_rate": 1.2257528075756792e-06, "logits/chosen": -0.37846362590789795, "logits/rejected": -0.3183573782444, "logps/chosen": -236.5953369140625, "logps/rejected": -321.3846435546875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6250594854354858, "rewards/margins": 15.84733772277832, "rewards/rejected": -16.472396850585938, "step": 2606 }, { "epoch": 0.89, "learning_rate": 1.225214333423685e-06, "logits/chosen": -0.360076904296875, "logits/rejected": -0.3642169237136841, "logps/chosen": -148.3202667236328, "logps/rejected": -264.99169921875, "loss": 0.0131, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0489978790283203, "rewards/margins": 11.375284194946289, "rewards/rejected": -13.424280166625977, "step": 2607 }, { "epoch": 0.89, "learning_rate": 1.22467579047182e-06, "logits/chosen": -0.47822511196136475, "logits/rejected": -0.46189379692077637, "logps/chosen": -203.45521545410156, "logps/rejected": -343.6040954589844, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.1804264932870865, "rewards/margins": 14.486137390136719, "rewards/rejected": -14.6665620803833, "step": 2608 }, { "epoch": 0.89, "learning_rate": 1.2241371788846014e-06, "logits/chosen": -0.3529881238937378, "logits/rejected": -0.3305014371871948, "logps/chosen": -197.48818969726562, "logps/rejected": -283.86065673828125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.013704776763916, "rewards/margins": 11.881023406982422, "rewards/rejected": -13.894728660583496, "step": 2609 }, { "epoch": 0.89, "learning_rate": 1.2235984988265679e-06, "logits/chosen": -0.34317702054977417, "logits/rejected": -0.3210306763648987, "logps/chosen": -190.2569122314453, "logps/rejected": -375.8540344238281, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.1330785751342773, "rewards/margins": 17.618858337402344, "rewards/rejected": -18.751937866210938, "step": 2610 }, { "epoch": 0.89, "learning_rate": 1.223059750462279e-06, "logits/chosen": -0.44138965010643005, "logits/rejected": -0.40243035554885864, "logps/chosen": -197.9787139892578, "logps/rejected": -287.6790466308594, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.1156345009803772, "rewards/margins": 12.965556144714355, "rewards/rejected": -13.08119010925293, "step": 2611 }, { "epoch": 0.89, "learning_rate": 1.2225209339563143e-06, "logits/chosen": -0.38489586114883423, "logits/rejected": -0.35440319776535034, "logps/chosen": -237.5966033935547, "logps/rejected": -336.95330810546875, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -1.9234819412231445, "rewards/margins": 11.577691078186035, "rewards/rejected": -13.501172065734863, "step": 2612 }, { "epoch": 0.89, "learning_rate": 1.2219820494732755e-06, "logits/chosen": -0.394049733877182, "logits/rejected": -0.36320942640304565, "logps/chosen": -224.79244995117188, "logps/rejected": -402.9344787597656, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.1379942893981934, "rewards/margins": 15.679304122924805, "rewards/rejected": -16.817298889160156, "step": 2613 }, { "epoch": 0.89, "learning_rate": 1.2214430971777836e-06, "logits/chosen": -0.4358530640602112, "logits/rejected": -0.4160517752170563, "logps/chosen": -181.047119140625, "logps/rejected": -253.62628173828125, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.8257606029510498, "rewards/margins": 9.892457962036133, "rewards/rejected": -10.718217849731445, "step": 2614 }, { "epoch": 0.89, "learning_rate": 1.2209040772344817e-06, "logits/chosen": -0.2606770992279053, "logits/rejected": -0.24713844060897827, "logps/chosen": -167.91404724121094, "logps/rejected": -353.6745910644531, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.2595661878585815, "rewards/margins": 17.351696014404297, "rewards/rejected": -18.61126136779785, "step": 2615 }, { "epoch": 0.89, "learning_rate": 1.2203649898080327e-06, "logits/chosen": -0.4448631703853607, "logits/rejected": -0.4278637170791626, "logps/chosen": -288.16265869140625, "logps/rejected": -453.7691955566406, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.21384428441524506, "rewards/margins": 18.836057662963867, "rewards/rejected": -18.622215270996094, "step": 2616 }, { "epoch": 0.89, "learning_rate": 1.2198258350631204e-06, "logits/chosen": -0.36685121059417725, "logits/rejected": -0.3314291834831238, "logps/chosen": -211.99375915527344, "logps/rejected": -261.4640808105469, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.10464215278625488, "rewards/margins": 11.544321060180664, "rewards/rejected": -11.648962020874023, "step": 2617 }, { "epoch": 0.89, "learning_rate": 1.2192866131644493e-06, "logits/chosen": -0.31131476163864136, "logits/rejected": -0.28894567489624023, "logps/chosen": -103.95683288574219, "logps/rejected": -254.54983520507812, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.9928401708602905, "rewards/margins": 13.35207748413086, "rewards/rejected": -14.344917297363281, "step": 2618 }, { "epoch": 0.89, "learning_rate": 1.218747324276744e-06, "logits/chosen": -0.36332955956459045, "logits/rejected": -0.3034593164920807, "logps/chosen": -161.92222595214844, "logps/rejected": -204.35763549804688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.26030150055885315, "rewards/margins": 10.734725952148438, "rewards/rejected": -10.995026588439941, "step": 2619 }, { "epoch": 0.89, "learning_rate": 1.2182079685647496e-06, "logits/chosen": -0.2999166250228882, "logits/rejected": -0.27736684679985046, "logps/chosen": -125.40142822265625, "logps/rejected": -261.2215576171875, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.4346745014190674, "rewards/margins": 12.500060081481934, "rewards/rejected": -13.934734344482422, "step": 2620 }, { "epoch": 0.89, "learning_rate": 1.2176685461932328e-06, "logits/chosen": -0.4291332960128784, "logits/rejected": -0.36659812927246094, "logps/chosen": -233.43826293945312, "logps/rejected": -354.7972106933594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2670331299304962, "rewards/margins": 16.120094299316406, "rewards/rejected": -16.387126922607422, "step": 2621 }, { "epoch": 0.89, "learning_rate": 1.2171290573269789e-06, "logits/chosen": -0.5059084296226501, "logits/rejected": -0.49119293689727783, "logps/chosen": -269.0294494628906, "logps/rejected": -501.3006286621094, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.0695652961730957, "rewards/margins": 18.32311248779297, "rewards/rejected": -20.392677307128906, "step": 2622 }, { "epoch": 0.9, "learning_rate": 1.216589502130794e-06, "logits/chosen": -0.458290696144104, "logits/rejected": -0.44230544567108154, "logps/chosen": -160.24908447265625, "logps/rejected": -300.9573974609375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.373871088027954, "rewards/margins": 12.573901176452637, "rewards/rejected": -14.947772026062012, "step": 2623 }, { "epoch": 0.9, "learning_rate": 1.2160498807695053e-06, "logits/chosen": -0.4084708094596863, "logits/rejected": -0.392822802066803, "logps/chosen": -170.11175537109375, "logps/rejected": -329.69647216796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.115549921989441, "rewards/margins": 15.224320411682129, "rewards/rejected": -16.33987045288086, "step": 2624 }, { "epoch": 0.9, "learning_rate": 1.2155101934079597e-06, "logits/chosen": -0.3659527897834778, "logits/rejected": -0.34965232014656067, "logps/chosen": -137.24339294433594, "logps/rejected": -263.7826232910156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1657583713531494, "rewards/margins": 12.852774620056152, "rewards/rejected": -14.018533706665039, "step": 2625 }, { "epoch": 0.9, "learning_rate": 1.2149704402110242e-06, "logits/chosen": -0.3350077271461487, "logits/rejected": -0.3317449986934662, "logps/chosen": -196.29190063476562, "logps/rejected": -376.16845703125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.9378947019577026, "rewards/margins": 15.589411735534668, "rewards/rejected": -16.527305603027344, "step": 2626 }, { "epoch": 0.9, "learning_rate": 1.2144306213435856e-06, "logits/chosen": -0.418363094329834, "logits/rejected": -0.38550952076911926, "logps/chosen": -177.12518310546875, "logps/rejected": -298.66802978515625, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.8135135173797607, "rewards/margins": 13.708969116210938, "rewards/rejected": -14.522481918334961, "step": 2627 }, { "epoch": 0.9, "learning_rate": 1.2138907369705517e-06, "logits/chosen": -0.45475688576698303, "logits/rejected": -0.44423970580101013, "logps/chosen": -184.5987548828125, "logps/rejected": -331.7016906738281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171588897705078, "rewards/margins": 12.41212272644043, "rewards/rejected": -13.529280662536621, "step": 2628 }, { "epoch": 0.9, "learning_rate": 1.2133507872568492e-06, "logits/chosen": -0.41795870661735535, "logits/rejected": -0.39772742986679077, "logps/chosen": -249.64361572265625, "logps/rejected": -392.6297302246094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9557583928108215, "rewards/margins": 16.204082489013672, "rewards/rejected": -17.159841537475586, "step": 2629 }, { "epoch": 0.9, "learning_rate": 1.2128107723674255e-06, "logits/chosen": -0.3951337933540344, "logits/rejected": -0.3494969606399536, "logps/chosen": -218.3241729736328, "logps/rejected": -380.2547302246094, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.8765674829483032, "rewards/margins": 16.832067489624023, "rewards/rejected": -18.708635330200195, "step": 2630 }, { "epoch": 0.9, "learning_rate": 1.212270692467248e-06, "logits/chosen": -0.3291071951389313, "logits/rejected": -0.27518230676651, "logps/chosen": -222.35000610351562, "logps/rejected": -310.25384521484375, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.23068702220916748, "rewards/margins": 13.829062461853027, "rewards/rejected": -14.059748649597168, "step": 2631 }, { "epoch": 0.9, "learning_rate": 1.211730547721303e-06, "logits/chosen": -0.3774348199367523, "logits/rejected": -0.3453994691371918, "logps/chosen": -188.5718231201172, "logps/rejected": -302.63238525390625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.50078547000885, "rewards/margins": 13.135347366333008, "rewards/rejected": -14.636133193969727, "step": 2632 }, { "epoch": 0.9, "learning_rate": 1.2111903382945977e-06, "logits/chosen": -0.3677871525287628, "logits/rejected": -0.3448403775691986, "logps/chosen": -222.10833740234375, "logps/rejected": -380.20904541015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0766139030456543, "rewards/margins": 15.602646827697754, "rewards/rejected": -16.67926025390625, "step": 2633 }, { "epoch": 0.9, "learning_rate": 1.2106500643521584e-06, "logits/chosen": -0.4589656889438629, "logits/rejected": -0.4360996186733246, "logps/chosen": -173.97962951660156, "logps/rejected": -297.22418212890625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.38010311126708984, "rewards/margins": 11.469828605651855, "rewards/rejected": -11.849931716918945, "step": 2634 }, { "epoch": 0.9, "learning_rate": 1.2101097260590316e-06, "logits/chosen": -0.4047508239746094, "logits/rejected": -0.3699379563331604, "logps/chosen": -226.93133544921875, "logps/rejected": -367.54229736328125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.04270988702774048, "rewards/margins": 17.37957000732422, "rewards/rejected": -17.422279357910156, "step": 2635 }, { "epoch": 0.9, "learning_rate": 1.2095693235802832e-06, "logits/chosen": -0.3895733654499054, "logits/rejected": -0.35244929790496826, "logps/chosen": -168.29104614257812, "logps/rejected": -271.7913513183594, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": 0.10195240378379822, "rewards/margins": 13.296455383300781, "rewards/rejected": -13.194501876831055, "step": 2636 }, { "epoch": 0.9, "learning_rate": 1.2090288570809985e-06, "logits/chosen": -0.4162513315677643, "logits/rejected": -0.4017668068408966, "logps/chosen": -207.6898193359375, "logps/rejected": -354.3674011230469, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6159553527832031, "rewards/margins": 14.483251571655273, "rewards/rejected": -15.099206924438477, "step": 2637 }, { "epoch": 0.9, "learning_rate": 1.2084883267262826e-06, "logits/chosen": -0.38559457659721375, "logits/rejected": -0.38036400079727173, "logps/chosen": -183.77198791503906, "logps/rejected": -375.7276306152344, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3170456886291504, "rewards/margins": 16.94456672668457, "rewards/rejected": -18.261611938476562, "step": 2638 }, { "epoch": 0.9, "learning_rate": 1.20794773268126e-06, "logits/chosen": -0.3514364957809448, "logits/rejected": -0.3217603266239166, "logps/chosen": -135.9423828125, "logps/rejected": -245.61880493164062, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.2878468036651611, "rewards/margins": 13.209601402282715, "rewards/rejected": -14.497448921203613, "step": 2639 }, { "epoch": 0.9, "learning_rate": 1.207407075111075e-06, "logits/chosen": -0.3671659827232361, "logits/rejected": -0.31509482860565186, "logps/chosen": -194.66180419921875, "logps/rejected": -255.75674438476562, "loss": 0.0538, "rewards/accuracies": 0.9375, "rewards/chosen": -1.498547077178955, "rewards/margins": 12.004634857177734, "rewards/rejected": -13.503182411193848, "step": 2640 }, { "epoch": 0.9, "learning_rate": 1.2068663541808908e-06, "logits/chosen": -0.42499110102653503, "logits/rejected": -0.418207049369812, "logps/chosen": -233.1282958984375, "logps/rejected": -435.8592224121094, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -0.6960736513137817, "rewards/margins": 18.7081298828125, "rewards/rejected": -19.404205322265625, "step": 2641 }, { "epoch": 0.9, "learning_rate": 1.20632557005589e-06, "logits/chosen": -0.35296741127967834, "logits/rejected": -0.31682682037353516, "logps/chosen": -209.6691131591797, "logps/rejected": -289.75244140625, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.8254091739654541, "rewards/margins": 11.803411483764648, "rewards/rejected": -12.628820419311523, "step": 2642 }, { "epoch": 0.9, "learning_rate": 1.2057847229012753e-06, "logits/chosen": -0.4110005795955658, "logits/rejected": -0.3588961064815521, "logps/chosen": -217.98526000976562, "logps/rejected": -358.13720703125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.0288913249969482, "rewards/margins": 17.316072463989258, "rewards/rejected": -18.34496307373047, "step": 2643 }, { "epoch": 0.9, "learning_rate": 1.2052438128822672e-06, "logits/chosen": -0.37836140394210815, "logits/rejected": -0.35390907526016235, "logps/chosen": -159.16708374023438, "logps/rejected": -284.8445129394531, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.22373482584953308, "rewards/margins": 12.24116039276123, "rewards/rejected": -12.464895248413086, "step": 2644 }, { "epoch": 0.9, "learning_rate": 1.2047028401641071e-06, "logits/chosen": -0.452930748462677, "logits/rejected": -0.41850778460502625, "logps/chosen": -153.57034301757812, "logps/rejected": -267.6808166503906, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 0.26766330003738403, "rewards/margins": 13.239005088806152, "rewards/rejected": -12.971341133117676, "step": 2645 }, { "epoch": 0.9, "learning_rate": 1.2041618049120541e-06, "logits/chosen": -0.26306140422821045, "logits/rejected": -0.256221204996109, "logps/chosen": -134.27491760253906, "logps/rejected": -296.8848876953125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.034761428833008, "rewards/margins": 14.307882308959961, "rewards/rejected": -16.3426456451416, "step": 2646 }, { "epoch": 0.9, "learning_rate": 1.203620707291387e-06, "logits/chosen": -0.41569700837135315, "logits/rejected": -0.3718385398387909, "logps/chosen": -255.06253051757812, "logps/rejected": -337.1063537597656, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.3336522579193115, "rewards/margins": 13.53122329711914, "rewards/rejected": -13.864874839782715, "step": 2647 }, { "epoch": 0.9, "learning_rate": 1.203079547467404e-06, "logits/chosen": -0.32253512740135193, "logits/rejected": -0.25505203008651733, "logps/chosen": -244.67092895507812, "logps/rejected": -321.7043762207031, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4288969039916992, "rewards/margins": 14.486726760864258, "rewards/rejected": -15.91562557220459, "step": 2648 }, { "epoch": 0.9, "learning_rate": 1.2025383256054218e-06, "logits/chosen": -0.32348838448524475, "logits/rejected": -0.2932710349559784, "logps/chosen": -184.50022888183594, "logps/rejected": -298.28839111328125, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -1.8141319751739502, "rewards/margins": 13.404330253601074, "rewards/rejected": -15.218462944030762, "step": 2649 }, { "epoch": 0.9, "learning_rate": 1.2019970418707766e-06, "logits/chosen": -0.3755433261394501, "logits/rejected": -0.3405594229698181, "logps/chosen": -242.2171173095703, "logps/rejected": -401.5728759765625, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -1.538971185684204, "rewards/margins": 16.750873565673828, "rewards/rejected": -18.289844512939453, "step": 2650 }, { "epoch": 0.9, "learning_rate": 1.2014556964288221e-06, "logits/chosen": -0.40497255325317383, "logits/rejected": -0.34540852904319763, "logps/chosen": -186.4983367919922, "logps/rejected": -268.11798095703125, "loss": 0.0399, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7754998803138733, "rewards/margins": 13.433305740356445, "rewards/rejected": -14.208805084228516, "step": 2651 }, { "epoch": 0.91, "learning_rate": 1.200914289444933e-06, "logits/chosen": -0.3824521005153656, "logits/rejected": -0.3735540509223938, "logps/chosen": -193.3083038330078, "logps/rejected": -357.99261474609375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 0.023065894842147827, "rewards/margins": 16.86564064025879, "rewards/rejected": -16.842575073242188, "step": 2652 }, { "epoch": 0.91, "learning_rate": 1.2003728210845006e-06, "logits/chosen": -0.3534258306026459, "logits/rejected": -0.32444027066230774, "logps/chosen": -195.67147827148438, "logps/rejected": -305.03729248046875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.7300631999969482, "rewards/margins": 11.351388931274414, "rewards/rejected": -13.081452369689941, "step": 2653 }, { "epoch": 0.91, "learning_rate": 1.199831291512937e-06, "logits/chosen": -0.3819711208343506, "logits/rejected": -0.35715964436531067, "logps/chosen": -216.22821044921875, "logps/rejected": -320.39794921875, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -1.1192705631256104, "rewards/margins": 12.595563888549805, "rewards/rejected": -13.714834213256836, "step": 2654 }, { "epoch": 0.91, "learning_rate": 1.1992897008956715e-06, "logits/chosen": -0.43765878677368164, "logits/rejected": -0.4066811501979828, "logps/chosen": -176.49612426757812, "logps/rejected": -297.4197998046875, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.0835006833076477, "rewards/margins": 12.817325592041016, "rewards/rejected": -12.900825500488281, "step": 2655 }, { "epoch": 0.91, "learning_rate": 1.1987480493981525e-06, "logits/chosen": -0.4068090617656708, "logits/rejected": -0.3767448961734772, "logps/chosen": -201.8218994140625, "logps/rejected": -355.309326171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7733346223831177, "rewards/margins": 14.414297103881836, "rewards/rejected": -15.187629699707031, "step": 2656 }, { "epoch": 0.91, "learning_rate": 1.1982063371858473e-06, "logits/chosen": -0.40385743975639343, "logits/rejected": -0.3544154763221741, "logps/chosen": -192.1390380859375, "logps/rejected": -227.97396850585938, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.4005718231201172, "rewards/margins": 9.178112983703613, "rewards/rejected": -10.578682899475098, "step": 2657 }, { "epoch": 0.91, "learning_rate": 1.1976645644242413e-06, "logits/chosen": -0.41503340005874634, "logits/rejected": -0.3966131806373596, "logps/chosen": -222.6969451904297, "logps/rejected": -380.6444396972656, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -2.0566463470458984, "rewards/margins": 14.520660400390625, "rewards/rejected": -16.577306747436523, "step": 2658 }, { "epoch": 0.91, "learning_rate": 1.1971227312788387e-06, "logits/chosen": -0.3559054434299469, "logits/rejected": -0.3013722598552704, "logps/chosen": -193.45303344726562, "logps/rejected": -276.8070373535156, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9236841797828674, "rewards/margins": 13.372591018676758, "rewards/rejected": -14.29627799987793, "step": 2659 }, { "epoch": 0.91, "learning_rate": 1.1965808379151623e-06, "logits/chosen": -0.39520981907844543, "logits/rejected": -0.3948109745979309, "logps/chosen": -180.0478973388672, "logps/rejected": -383.724365234375, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.9737049341201782, "rewards/margins": 17.4831485748291, "rewards/rejected": -18.45685386657715, "step": 2660 }, { "epoch": 0.91, "learning_rate": 1.1960388844987528e-06, "logits/chosen": -0.36564135551452637, "logits/rejected": -0.36474040150642395, "logps/chosen": -152.08465576171875, "logps/rejected": -317.8724670410156, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609832763671875, "rewards/margins": 15.62220573425293, "rewards/rejected": -16.583189010620117, "step": 2661 }, { "epoch": 0.91, "learning_rate": 1.1954968711951691e-06, "logits/chosen": -0.26793530583381653, "logits/rejected": -0.22168566286563873, "logps/chosen": -174.38800048828125, "logps/rejected": -261.4639892578125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.02792876958847046, "rewards/margins": 12.358201026916504, "rewards/rejected": -12.386129379272461, "step": 2662 }, { "epoch": 0.91, "learning_rate": 1.1949547981699893e-06, "logits/chosen": -0.40087661147117615, "logits/rejected": -0.35683244466781616, "logps/chosen": -245.83091735839844, "logps/rejected": -342.57049560546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.4947422742843628, "rewards/margins": 14.487188339233398, "rewards/rejected": -13.99244499206543, "step": 2663 }, { "epoch": 0.91, "learning_rate": 1.1944126655888093e-06, "logits/chosen": -0.31677162647247314, "logits/rejected": -0.287746787071228, "logps/chosen": -207.74832153320312, "logps/rejected": -332.1690979003906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7213912606239319, "rewards/margins": 14.111859321594238, "rewards/rejected": -14.833250045776367, "step": 2664 }, { "epoch": 0.91, "learning_rate": 1.1938704736172427e-06, "logits/chosen": -0.3891718089580536, "logits/rejected": -0.34637606143951416, "logps/chosen": -205.10179138183594, "logps/rejected": -333.1259765625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.028212785720825, "rewards/margins": 16.029775619506836, "rewards/rejected": -18.0579891204834, "step": 2665 }, { "epoch": 0.91, "learning_rate": 1.193328222420922e-06, "logits/chosen": -0.33725059032440186, "logits/rejected": -0.30568787455558777, "logps/chosen": -222.81314086914062, "logps/rejected": -377.3201599121094, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 0.4671139121055603, "rewards/margins": 15.857682228088379, "rewards/rejected": -15.390568733215332, "step": 2666 }, { "epoch": 0.91, "learning_rate": 1.1927859121654973e-06, "logits/chosen": -0.44140851497650146, "logits/rejected": -0.36356276273727417, "logps/chosen": -231.0351104736328, "logps/rejected": -347.220947265625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.5666223168373108, "rewards/margins": 14.806161880493164, "rewards/rejected": -15.372783660888672, "step": 2667 }, { "epoch": 0.91, "learning_rate": 1.1922435430166369e-06, "logits/chosen": -0.34144261479377747, "logits/rejected": -0.31731802225112915, "logps/chosen": -181.49037170410156, "logps/rejected": -320.8206787109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7884474396705627, "rewards/margins": 13.454537391662598, "rewards/rejected": -14.2429838180542, "step": 2668 }, { "epoch": 0.91, "learning_rate": 1.1917011151400271e-06, "logits/chosen": -0.3013635277748108, "logits/rejected": -0.29846325516700745, "logps/chosen": -219.81141662597656, "logps/rejected": -385.20770263671875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.4346604347229004, "rewards/margins": 15.032261848449707, "rewards/rejected": -16.466920852661133, "step": 2669 }, { "epoch": 0.91, "learning_rate": 1.1911586287013725e-06, "logits/chosen": -0.38961780071258545, "logits/rejected": -0.33734259009361267, "logps/chosen": -227.3920135498047, "logps/rejected": -319.3489685058594, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.02320234104990959, "rewards/margins": 14.464983940124512, "rewards/rejected": -14.488186836242676, "step": 2670 }, { "epoch": 0.91, "learning_rate": 1.1906160838663943e-06, "logits/chosen": -0.3159940838813782, "logits/rejected": -0.28654929995536804, "logps/chosen": -181.3931427001953, "logps/rejected": -299.5973815917969, "loss": 0.0301, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2343899011611938, "rewards/margins": 14.456377029418945, "rewards/rejected": -15.690767288208008, "step": 2671 }, { "epoch": 0.91, "learning_rate": 1.1900734808008332e-06, "logits/chosen": -0.3458465039730072, "logits/rejected": -0.3221941590309143, "logps/chosen": -226.45249938964844, "logps/rejected": -342.23681640625, "loss": 0.0323, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5458869934082031, "rewards/margins": 13.435874938964844, "rewards/rejected": -12.88998794555664, "step": 2672 }, { "epoch": 0.91, "learning_rate": 1.1895308196704472e-06, "logits/chosen": -0.37108513712882996, "logits/rejected": -0.3583308458328247, "logps/chosen": -242.0697021484375, "logps/rejected": -404.3282775878906, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.3354952037334442, "rewards/margins": 16.56652069091797, "rewards/rejected": -16.231027603149414, "step": 2673 }, { "epoch": 0.91, "learning_rate": 1.1889881006410112e-06, "logits/chosen": -0.31945863366127014, "logits/rejected": -0.2617584764957428, "logps/chosen": -174.49346923828125, "logps/rejected": -307.773193359375, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -0.5164186954498291, "rewards/margins": 15.248091697692871, "rewards/rejected": -15.764510154724121, "step": 2674 }, { "epoch": 0.91, "learning_rate": 1.1884453238783182e-06, "logits/chosen": -0.24118943512439728, "logits/rejected": -0.19679714739322662, "logps/chosen": -237.47543334960938, "logps/rejected": -344.3170166015625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.7118151187896729, "rewards/margins": 14.547683715820312, "rewards/rejected": -15.259498596191406, "step": 2675 }, { "epoch": 0.91, "learning_rate": 1.1879024895481797e-06, "logits/chosen": -0.4274117946624756, "logits/rejected": -0.421023964881897, "logps/chosen": -153.89788818359375, "logps/rejected": -286.7286682128906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8486876487731934, "rewards/margins": 11.6395845413208, "rewards/rejected": -12.488271713256836, "step": 2676 }, { "epoch": 0.91, "learning_rate": 1.1873595978164236e-06, "logits/chosen": -0.3523722290992737, "logits/rejected": -0.2995177209377289, "logps/chosen": -219.65386962890625, "logps/rejected": -335.0431823730469, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.05778820812702179, "rewards/margins": 14.707005500793457, "rewards/rejected": -14.649216651916504, "step": 2677 }, { "epoch": 0.91, "learning_rate": 1.186816648848896e-06, "logits/chosen": -0.2841511070728302, "logits/rejected": -0.27765223383903503, "logps/chosen": -186.62696838378906, "logps/rejected": -330.6341247558594, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.26538386940956116, "rewards/margins": 14.946969032287598, "rewards/rejected": -15.212353706359863, "step": 2678 }, { "epoch": 0.91, "learning_rate": 1.1862736428114606e-06, "logits/chosen": -0.40400639176368713, "logits/rejected": -0.38739120960235596, "logps/chosen": -165.97134399414062, "logps/rejected": -287.4844665527344, "loss": 0.0335, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4666023850440979, "rewards/margins": 12.99217414855957, "rewards/rejected": -13.45877456665039, "step": 2679 }, { "epoch": 0.91, "learning_rate": 1.1857305798699975e-06, "logits/chosen": -0.33744367957115173, "logits/rejected": -0.30898064374923706, "logps/chosen": -211.04983520507812, "logps/rejected": -348.0718994140625, "loss": 0.0417, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8985168933868408, "rewards/margins": 14.038291931152344, "rewards/rejected": -14.936807632446289, "step": 2680 }, { "epoch": 0.92, "learning_rate": 1.1851874601904055e-06, "logits/chosen": -0.3297681212425232, "logits/rejected": -0.29009220004081726, "logps/chosen": -200.45263671875, "logps/rejected": -261.50177001953125, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 0.2781721353530884, "rewards/margins": 12.020977973937988, "rewards/rejected": -11.742805480957031, "step": 2681 }, { "epoch": 0.92, "learning_rate": 1.1846442839386003e-06, "logits/chosen": -0.3402109742164612, "logits/rejected": -0.3057141602039337, "logps/chosen": -202.92483520507812, "logps/rejected": -299.0839538574219, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4484978914260864, "rewards/margins": 12.85745906829834, "rewards/rejected": -14.305957794189453, "step": 2682 }, { "epoch": 0.92, "learning_rate": 1.1841010512805146e-06, "logits/chosen": -0.37060850858688354, "logits/rejected": -0.31890496611595154, "logps/chosen": -246.1071319580078, "logps/rejected": -365.85693359375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.41272854804992676, "rewards/margins": 15.677682876586914, "rewards/rejected": -15.26495361328125, "step": 2683 }, { "epoch": 0.92, "learning_rate": 1.183557762382098e-06, "logits/chosen": -0.39797261357307434, "logits/rejected": -0.3820923864841461, "logps/chosen": -272.3111877441406, "logps/rejected": -418.65338134765625, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7950568199157715, "rewards/margins": 13.772058486938477, "rewards/rejected": -14.56711483001709, "step": 2684 }, { "epoch": 0.92, "learning_rate": 1.1830144174093181e-06, "logits/chosen": -0.39019325375556946, "logits/rejected": -0.36297911405563354, "logps/chosen": -151.82000732421875, "logps/rejected": -252.44064331054688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.264154851436615, "rewards/margins": 13.95533275604248, "rewards/rejected": -14.219488143920898, "step": 2685 }, { "epoch": 0.92, "learning_rate": 1.1824710165281596e-06, "logits/chosen": -0.2742413282394409, "logits/rejected": -0.26747700572013855, "logps/chosen": -201.50978088378906, "logps/rejected": -326.6403503417969, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -2.735917091369629, "rewards/margins": 12.7039155960083, "rewards/rejected": -15.43983268737793, "step": 2686 }, { "epoch": 0.92, "learning_rate": 1.1819275599046235e-06, "logits/chosen": -0.3298741281032562, "logits/rejected": -0.28888386487960815, "logps/chosen": -191.2889404296875, "logps/rejected": -258.85809326171875, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 0.13939684629440308, "rewards/margins": 11.340713500976562, "rewards/rejected": -11.201316833496094, "step": 2687 }, { "epoch": 0.92, "learning_rate": 1.1813840477047287e-06, "logits/chosen": -0.330463171005249, "logits/rejected": -0.2869333028793335, "logps/chosen": -175.59811401367188, "logps/rejected": -244.34848022460938, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.29869091510772705, "rewards/margins": 11.908329963684082, "rewards/rejected": -12.207021713256836, "step": 2688 }, { "epoch": 0.92, "learning_rate": 1.1808404800945105e-06, "logits/chosen": -0.33429116010665894, "logits/rejected": -0.30671781301498413, "logps/chosen": -220.00216674804688, "logps/rejected": -353.2779846191406, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.4657399654388428, "rewards/margins": 14.380256652832031, "rewards/rejected": -16.845998764038086, "step": 2689 }, { "epoch": 0.92, "learning_rate": 1.1802968572400208e-06, "logits/chosen": -0.2889551520347595, "logits/rejected": -0.2533782124519348, "logps/chosen": -217.73995971679688, "logps/rejected": -306.55218505859375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.47831976413726807, "rewards/margins": 14.532644271850586, "rewards/rejected": -15.010965347290039, "step": 2690 }, { "epoch": 0.92, "learning_rate": 1.17975317930733e-06, "logits/chosen": -0.34476178884506226, "logits/rejected": -0.3045461177825928, "logps/chosen": -228.58999633789062, "logps/rejected": -306.5672607421875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 0.14983606338500977, "rewards/margins": 11.559551239013672, "rewards/rejected": -11.40971565246582, "step": 2691 }, { "epoch": 0.92, "learning_rate": 1.179209446462523e-06, "logits/chosen": -0.3036459684371948, "logits/rejected": -0.2915107309818268, "logps/chosen": -261.10516357421875, "logps/rejected": -403.8567810058594, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 0.7583225965499878, "rewards/margins": 16.115074157714844, "rewards/rejected": -15.35675048828125, "step": 2692 }, { "epoch": 0.92, "learning_rate": 1.1786656588717038e-06, "logits/chosen": -0.3000178337097168, "logits/rejected": -0.25937023758888245, "logps/chosen": -184.96412658691406, "logps/rejected": -247.9586944580078, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7389069199562073, "rewards/margins": 11.481264114379883, "rewards/rejected": -12.220170974731445, "step": 2693 }, { "epoch": 0.92, "learning_rate": 1.1781218167009915e-06, "logits/chosen": -0.31860631704330444, "logits/rejected": -0.2847749888896942, "logps/chosen": -203.00015258789062, "logps/rejected": -310.14044189453125, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -0.2518627345561981, "rewards/margins": 15.809613227844238, "rewards/rejected": -16.06147575378418, "step": 2694 }, { "epoch": 0.92, "learning_rate": 1.177577920116522e-06, "logits/chosen": -0.2882639169692993, "logits/rejected": -0.24161367118358612, "logps/chosen": -182.02769470214844, "logps/rejected": -341.89910888671875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.5756131410598755, "rewards/margins": 14.658281326293945, "rewards/rejected": -16.23389434814453, "step": 2695 }, { "epoch": 0.92, "learning_rate": 1.1770339692844483e-06, "logits/chosen": -0.3813041150569916, "logits/rejected": -0.32571491599082947, "logps/chosen": -216.89260864257812, "logps/rejected": -306.2550354003906, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.3701132535934448, "rewards/margins": 15.105857849121094, "rewards/rejected": -16.475971221923828, "step": 2696 }, { "epoch": 0.92, "learning_rate": 1.1764899643709406e-06, "logits/chosen": -0.3135989308357239, "logits/rejected": -0.28338149189949036, "logps/chosen": -255.52493286132812, "logps/rejected": -408.15283203125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.6623635292053223, "rewards/margins": 17.490739822387695, "rewards/rejected": -18.15310287475586, "step": 2697 }, { "epoch": 0.92, "learning_rate": 1.1759459055421841e-06, "logits/chosen": -0.26076582074165344, "logits/rejected": -0.2299756556749344, "logps/chosen": -188.44512939453125, "logps/rejected": -274.7181701660156, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.3737518787384033, "rewards/margins": 12.342458724975586, "rewards/rejected": -13.716209411621094, "step": 2698 }, { "epoch": 0.92, "learning_rate": 1.1754017929643817e-06, "logits/chosen": -0.30315476655960083, "logits/rejected": -0.27550649642944336, "logps/chosen": -168.0497283935547, "logps/rejected": -272.74169921875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.6236357688903809, "rewards/margins": 11.374346733093262, "rewards/rejected": -12.997982025146484, "step": 2699 }, { "epoch": 0.92, "learning_rate": 1.1748576268037523e-06, "logits/chosen": -0.2937515377998352, "logits/rejected": -0.2573448419570923, "logps/chosen": -159.51828002929688, "logps/rejected": -262.7811279296875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.1576499342918396, "rewards/margins": 12.31089973449707, "rewards/rejected": -12.153249740600586, "step": 2700 }, { "epoch": 0.92, "learning_rate": 1.1743134072265304e-06, "logits/chosen": -0.2672722041606903, "logits/rejected": -0.2298324853181839, "logps/chosen": -210.28713989257812, "logps/rejected": -355.4650573730469, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.2862566113471985, "rewards/margins": 14.6795654296875, "rewards/rejected": -14.965822219848633, "step": 2701 }, { "epoch": 0.92, "learning_rate": 1.1737691343989688e-06, "logits/chosen": -0.29099178314208984, "logits/rejected": -0.27547934651374817, "logps/chosen": -176.02476501464844, "logps/rejected": -337.048583984375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.19236306846141815, "rewards/margins": 15.263113975524902, "rewards/rejected": -15.455476760864258, "step": 2702 }, { "epoch": 0.92, "learning_rate": 1.1732248084873347e-06, "logits/chosen": -0.33452606201171875, "logits/rejected": -0.3178432583808899, "logps/chosen": -156.3170166015625, "logps/rejected": -271.869140625, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -1.338158369064331, "rewards/margins": 12.599242210388184, "rewards/rejected": -13.937398910522461, "step": 2703 }, { "epoch": 0.92, "learning_rate": 1.1726804296579115e-06, "logits/chosen": -0.1946423351764679, "logits/rejected": -0.16675876080989838, "logps/chosen": -161.92652893066406, "logps/rejected": -304.95281982421875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.621567964553833, "rewards/margins": 14.289786338806152, "rewards/rejected": -15.911354064941406, "step": 2704 }, { "epoch": 0.92, "learning_rate": 1.1721359980770005e-06, "logits/chosen": -0.3497605621814728, "logits/rejected": -0.3054628074169159, "logps/chosen": -231.6501007080078, "logps/rejected": -373.8495788574219, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.8249914646148682, "rewards/margins": 15.433247566223145, "rewards/rejected": -17.258237838745117, "step": 2705 }, { "epoch": 0.92, "learning_rate": 1.1715915139109178e-06, "logits/chosen": -0.2974780797958374, "logits/rejected": -0.24948853254318237, "logps/chosen": -283.10858154296875, "logps/rejected": -337.8753662109375, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.8553471565246582, "rewards/margins": 12.56442642211914, "rewards/rejected": -13.419772148132324, "step": 2706 }, { "epoch": 0.92, "learning_rate": 1.1710469773259956e-06, "logits/chosen": -0.29335442185401917, "logits/rejected": -0.26256221532821655, "logps/chosen": -240.02984619140625, "logps/rejected": -382.9793395996094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.1784257888793945, "rewards/margins": 13.098177909851074, "rewards/rejected": -15.276604652404785, "step": 2707 }, { "epoch": 0.92, "learning_rate": 1.170502388488582e-06, "logits/chosen": -0.2583821415901184, "logits/rejected": -0.24454465508460999, "logps/chosen": -154.14453125, "logps/rejected": -284.76531982421875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.2524715065956116, "rewards/margins": 14.365365982055664, "rewards/rejected": -14.112895965576172, "step": 2708 }, { "epoch": 0.92, "learning_rate": 1.1699577475650419e-06, "logits/chosen": -0.25803259015083313, "logits/rejected": -0.22301672399044037, "logps/chosen": -157.22711181640625, "logps/rejected": -283.06103515625, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.6765196323394775, "rewards/margins": 13.718035697937012, "rewards/rejected": -14.394556045532227, "step": 2709 }, { "epoch": 0.92, "learning_rate": 1.1694130547217554e-06, "logits/chosen": -0.14903898537158966, "logits/rejected": -0.12866580486297607, "logps/chosen": -126.73690795898438, "logps/rejected": -247.03472900390625, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -2.2565207481384277, "rewards/margins": 12.175976753234863, "rewards/rejected": -14.43249797821045, "step": 2710 }, { "epoch": 0.93, "learning_rate": 1.1688683101251184e-06, "logits/chosen": -0.2605361044406891, "logits/rejected": -0.2398008108139038, "logps/chosen": -184.15330505371094, "logps/rejected": -276.6175231933594, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8871541023254395, "rewards/margins": 12.45998764038086, "rewards/rejected": -13.34714126586914, "step": 2711 }, { "epoch": 0.93, "learning_rate": 1.1683235139415436e-06, "logits/chosen": -0.2905138432979584, "logits/rejected": -0.27787086367607117, "logps/chosen": -152.2189178466797, "logps/rejected": -286.169189453125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.16280050575733185, "rewards/margins": 12.635130882263184, "rewards/rejected": -12.797930717468262, "step": 2712 }, { "epoch": 0.93, "learning_rate": 1.167778666337458e-06, "logits/chosen": -0.3026086688041687, "logits/rejected": -0.27389881014823914, "logps/chosen": -214.71078491210938, "logps/rejected": -350.7413330078125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 0.235824316740036, "rewards/margins": 15.418439865112305, "rewards/rejected": -15.182615280151367, "step": 2713 }, { "epoch": 0.93, "learning_rate": 1.1672337674793048e-06, "logits/chosen": -0.3626295030117035, "logits/rejected": -0.33919641375541687, "logps/chosen": -200.69505310058594, "logps/rejected": -430.07061767578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.130512237548828, "rewards/margins": 19.156848907470703, "rewards/rejected": -21.28736114501953, "step": 2714 }, { "epoch": 0.93, "learning_rate": 1.1666888175335441e-06, "logits/chosen": -0.2763957381248474, "logits/rejected": -0.23922273516654968, "logps/chosen": -201.88877868652344, "logps/rejected": -362.2859191894531, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.7961839437484741, "rewards/margins": 15.459957122802734, "rewards/rejected": -17.256141662597656, "step": 2715 }, { "epoch": 0.93, "learning_rate": 1.1661438166666495e-06, "logits/chosen": -0.40083709359169006, "logits/rejected": -0.3396025598049164, "logps/chosen": -309.392333984375, "logps/rejected": -398.888671875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.6468325853347778, "rewards/margins": 16.505584716796875, "rewards/rejected": -17.152416229248047, "step": 2716 }, { "epoch": 0.93, "learning_rate": 1.165598765045112e-06, "logits/chosen": -0.3231176435947418, "logits/rejected": -0.30927374958992004, "logps/chosen": -205.38731384277344, "logps/rejected": -330.073486328125, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.256588101387024, "rewards/margins": 15.089953422546387, "rewards/rejected": -16.346540451049805, "step": 2717 }, { "epoch": 0.93, "learning_rate": 1.1650536628354372e-06, "logits/chosen": -0.2370893657207489, "logits/rejected": -0.21557310223579407, "logps/chosen": -215.34066772460938, "logps/rejected": -336.2469787597656, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.5830916166305542, "rewards/margins": 14.11085033416748, "rewards/rejected": -15.69394302368164, "step": 2718 }, { "epoch": 0.93, "learning_rate": 1.1645085102041461e-06, "logits/chosen": -0.2507967948913574, "logits/rejected": -0.24025510251522064, "logps/chosen": -126.62554931640625, "logps/rejected": -276.4400634765625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7550246119499207, "rewards/margins": 13.070843696594238, "rewards/rejected": -13.825868606567383, "step": 2719 }, { "epoch": 0.93, "learning_rate": 1.1639633073177752e-06, "logits/chosen": -0.23640207946300507, "logits/rejected": -0.2195671647787094, "logps/chosen": -223.67599487304688, "logps/rejected": -359.96954345703125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.4739757776260376, "rewards/margins": 16.04693603515625, "rewards/rejected": -17.520912170410156, "step": 2720 }, { "epoch": 0.93, "learning_rate": 1.1634180543428768e-06, "logits/chosen": -0.2208739072084427, "logits/rejected": -0.17154701054096222, "logps/chosen": -198.2742919921875, "logps/rejected": -319.8347473144531, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.1744732856750488, "rewards/margins": 15.171144485473633, "rewards/rejected": -16.345617294311523, "step": 2721 }, { "epoch": 0.93, "learning_rate": 1.1628727514460182e-06, "logits/chosen": -0.31826940178871155, "logits/rejected": -0.3012731671333313, "logps/chosen": -212.49954223632812, "logps/rejected": -316.6264953613281, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.5885899066925049, "rewards/margins": 14.205155372619629, "rewards/rejected": -14.793746948242188, "step": 2722 }, { "epoch": 0.93, "learning_rate": 1.1623273987937814e-06, "logits/chosen": -0.25961461663246155, "logits/rejected": -0.2491617351770401, "logps/chosen": -145.19749450683594, "logps/rejected": -330.1210632324219, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -2.1207404136657715, "rewards/margins": 15.674874305725098, "rewards/rejected": -17.79561424255371, "step": 2723 }, { "epoch": 0.93, "learning_rate": 1.161781996552765e-06, "logits/chosen": -0.28038495779037476, "logits/rejected": -0.2515980005264282, "logps/chosen": -186.45465087890625, "logps/rejected": -316.48046875, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -1.240866780281067, "rewards/margins": 15.117334365844727, "rewards/rejected": -16.35820198059082, "step": 2724 }, { "epoch": 0.93, "learning_rate": 1.1612365448895807e-06, "logits/chosen": -0.3310549259185791, "logits/rejected": -0.3008629083633423, "logps/chosen": -195.5712890625, "logps/rejected": -320.9017028808594, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1547973155975342, "rewards/margins": 13.093114852905273, "rewards/rejected": -14.247910499572754, "step": 2725 }, { "epoch": 0.93, "learning_rate": 1.1606910439708573e-06, "logits/chosen": -0.2825566232204437, "logits/rejected": -0.2738012671470642, "logps/chosen": -164.46347045898438, "logps/rejected": -245.92591857910156, "loss": 0.0635, "rewards/accuracies": 0.9375, "rewards/chosen": -2.801115036010742, "rewards/margins": 8.430083274841309, "rewards/rejected": -11.23119831085205, "step": 2726 }, { "epoch": 0.93, "learning_rate": 1.1601454939632373e-06, "logits/chosen": -0.30031463503837585, "logits/rejected": -0.24964898824691772, "logps/chosen": -245.797119140625, "logps/rejected": -299.0574951171875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.8281015157699585, "rewards/margins": 11.430410385131836, "rewards/rejected": -12.25851058959961, "step": 2727 }, { "epoch": 0.93, "learning_rate": 1.1595998950333793e-06, "logits/chosen": -0.2973514795303345, "logits/rejected": -0.27538207173347473, "logps/chosen": -148.3056182861328, "logps/rejected": -340.90869140625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.7154837250709534, "rewards/margins": 14.451642990112305, "rewards/rejected": -15.167126655578613, "step": 2728 }, { "epoch": 0.93, "learning_rate": 1.1590542473479556e-06, "logits/chosen": -0.25039762258529663, "logits/rejected": -0.22036446630954742, "logps/chosen": -152.57955932617188, "logps/rejected": -248.9562530517578, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.214645266532898, "rewards/margins": 13.244712829589844, "rewards/rejected": -14.459357261657715, "step": 2729 }, { "epoch": 0.93, "learning_rate": 1.1585085510736546e-06, "logits/chosen": -0.2408284842967987, "logits/rejected": -0.23621538281440735, "logps/chosen": -152.4442596435547, "logps/rejected": -337.82000732421875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.8394075036048889, "rewards/margins": 15.900541305541992, "rewards/rejected": -16.739948272705078, "step": 2730 }, { "epoch": 0.93, "learning_rate": 1.1579628063771786e-06, "logits/chosen": -0.3165755867958069, "logits/rejected": -0.306720495223999, "logps/chosen": -165.25828552246094, "logps/rejected": -338.28350830078125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.1736185550689697, "rewards/margins": 14.506609916687012, "rewards/rejected": -15.680228233337402, "step": 2731 }, { "epoch": 0.93, "learning_rate": 1.157417013425245e-06, "logits/chosen": -0.29533156752586365, "logits/rejected": -0.28039777278900146, "logps/chosen": -263.38507080078125, "logps/rejected": -400.3868103027344, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.4920138120651245, "rewards/margins": 16.112754821777344, "rewards/rejected": -16.604764938354492, "step": 2732 }, { "epoch": 0.93, "learning_rate": 1.1568711723845865e-06, "logits/chosen": -0.33334770798683167, "logits/rejected": -0.2907031178474426, "logps/chosen": -247.92759704589844, "logps/rejected": -359.03326416015625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.30673861503601074, "rewards/margins": 15.857769012451172, "rewards/rejected": -16.164508819580078, "step": 2733 }, { "epoch": 0.93, "learning_rate": 1.1563252834219494e-06, "logits/chosen": -0.2551584839820862, "logits/rejected": -0.23950563371181488, "logps/chosen": -156.0664825439453, "logps/rejected": -287.803955078125, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.7566243410110474, "rewards/margins": 13.491814613342285, "rewards/rejected": -15.24843978881836, "step": 2734 }, { "epoch": 0.93, "learning_rate": 1.1557793467040958e-06, "logits/chosen": -0.3080753684043884, "logits/rejected": -0.2806236743927002, "logps/chosen": -204.83535766601562, "logps/rejected": -320.26727294921875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.5495318174362183, "rewards/margins": 11.769243240356445, "rewards/rejected": -13.318774223327637, "step": 2735 }, { "epoch": 0.93, "learning_rate": 1.1552333623978017e-06, "logits/chosen": -0.30397099256515503, "logits/rejected": -0.3036516606807709, "logps/chosen": -233.3210906982422, "logps/rejected": -399.34393310546875, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": -2.781900644302368, "rewards/margins": 14.877212524414062, "rewards/rejected": -17.65911293029785, "step": 2736 }, { "epoch": 0.93, "learning_rate": 1.1546873306698575e-06, "logits/chosen": -0.351173996925354, "logits/rejected": -0.3483119010925293, "logps/chosen": -244.29251098632812, "logps/rejected": -427.2227478027344, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.21588049829006195, "rewards/margins": 15.008011817932129, "rewards/rejected": -15.223893165588379, "step": 2737 }, { "epoch": 0.93, "learning_rate": 1.1541412516870684e-06, "logits/chosen": -0.34486258029937744, "logits/rejected": -0.33173179626464844, "logps/chosen": -230.35699462890625, "logps/rejected": -347.41998291015625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6377053260803223, "rewards/margins": 14.83139419555664, "rewards/rejected": -16.469099044799805, "step": 2738 }, { "epoch": 0.93, "learning_rate": 1.1535951256162542e-06, "logits/chosen": -0.3457697927951813, "logits/rejected": -0.29981911182403564, "logps/chosen": -210.1657257080078, "logps/rejected": -371.6733093261719, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.6206765174865723, "rewards/margins": 16.06416893005371, "rewards/rejected": -16.684844970703125, "step": 2739 }, { "epoch": 0.94, "learning_rate": 1.1530489526242488e-06, "logits/chosen": -0.4139569103717804, "logits/rejected": -0.35669898986816406, "logps/chosen": -227.5648956298828, "logps/rejected": -328.126220703125, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.6751488447189331, "rewards/margins": 13.52424430847168, "rewards/rejected": -14.199392318725586, "step": 2740 }, { "epoch": 0.94, "learning_rate": 1.1525027328779005e-06, "logits/chosen": -0.2972300052642822, "logits/rejected": -0.31119492650032043, "logps/chosen": -227.83102416992188, "logps/rejected": -439.9423828125, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.1512665748596191, "rewards/margins": 16.74192237854004, "rewards/rejected": -17.893186569213867, "step": 2741 }, { "epoch": 0.94, "learning_rate": 1.151956466544072e-06, "logits/chosen": -0.262500524520874, "logits/rejected": -0.23248372972011566, "logps/chosen": -166.36663818359375, "logps/rejected": -325.9510498046875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.9144232273101807, "rewards/margins": 15.559329986572266, "rewards/rejected": -17.473752975463867, "step": 2742 }, { "epoch": 0.94, "learning_rate": 1.1514101537896397e-06, "logits/chosen": -0.30657386779785156, "logits/rejected": -0.2848556339740753, "logps/chosen": -188.5561981201172, "logps/rejected": -278.8712463378906, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.3763139843940735, "rewards/margins": 12.008865356445312, "rewards/rejected": -12.385178565979004, "step": 2743 }, { "epoch": 0.94, "learning_rate": 1.150863794781495e-06, "logits/chosen": -0.28006795048713684, "logits/rejected": -0.2545057237148285, "logps/chosen": -173.10250854492188, "logps/rejected": -319.3921813964844, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2620857954025269, "rewards/margins": 15.472556114196777, "rewards/rejected": -16.734643936157227, "step": 2744 }, { "epoch": 0.94, "learning_rate": 1.150317389686543e-06, "logits/chosen": -0.25476598739624023, "logits/rejected": -0.24706560373306274, "logps/chosen": -247.8878173828125, "logps/rejected": -416.0248718261719, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2153571844100952, "rewards/margins": 15.837278366088867, "rewards/rejected": -17.052635192871094, "step": 2745 }, { "epoch": 0.94, "learning_rate": 1.149770938671703e-06, "logits/chosen": -0.38704487681388855, "logits/rejected": -0.3636605739593506, "logps/chosen": -219.910400390625, "logps/rejected": -407.54107666015625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.243025541305542, "rewards/margins": 18.16575813293457, "rewards/rejected": -19.408784866333008, "step": 2746 }, { "epoch": 0.94, "learning_rate": 1.149224441903908e-06, "logits/chosen": -0.30339953303337097, "logits/rejected": -0.271014928817749, "logps/chosen": -187.90859985351562, "logps/rejected": -302.6044006347656, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.2362897396087646, "rewards/margins": 14.454410552978516, "rewards/rejected": -15.690701484680176, "step": 2747 }, { "epoch": 0.94, "learning_rate": 1.1486778995501056e-06, "logits/chosen": -0.28670522570610046, "logits/rejected": -0.2479342520236969, "logps/chosen": -168.56178283691406, "logps/rejected": -250.47622680664062, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.04148352891206741, "rewards/margins": 11.726154327392578, "rewards/rejected": -11.76763916015625, "step": 2748 }, { "epoch": 0.94, "learning_rate": 1.1481313117772561e-06, "logits/chosen": -0.2998214662075043, "logits/rejected": -0.2598962187767029, "logps/chosen": -196.77598571777344, "logps/rejected": -321.2436828613281, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.5214904546737671, "rewards/margins": 16.12061309814453, "rewards/rejected": -16.64210319519043, "step": 2749 }, { "epoch": 0.94, "learning_rate": 1.1475846787523359e-06, "logits/chosen": -0.29602980613708496, "logits/rejected": -0.26475560665130615, "logps/chosen": -179.74374389648438, "logps/rejected": -315.29168701171875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.3192274570465088, "rewards/margins": 14.163052558898926, "rewards/rejected": -15.482279777526855, "step": 2750 }, { "epoch": 0.94, "learning_rate": 1.1470380006423324e-06, "logits/chosen": -0.23228347301483154, "logits/rejected": -0.21982312202453613, "logps/chosen": -226.13552856445312, "logps/rejected": -397.275634765625, "loss": 0.0221, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7734090089797974, "rewards/margins": 15.60250186920166, "rewards/rejected": -16.37590980529785, "step": 2751 }, { "epoch": 0.94, "learning_rate": 1.1464912776142492e-06, "logits/chosen": -0.3672367036342621, "logits/rejected": -0.3227385878562927, "logps/chosen": -195.13446044921875, "logps/rejected": -312.0146484375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.5621628761291504, "rewards/margins": 15.59848403930664, "rewards/rejected": -17.160648345947266, "step": 2752 }, { "epoch": 0.94, "learning_rate": 1.1459445098351022e-06, "logits/chosen": -0.288184255361557, "logits/rejected": -0.2709898054599762, "logps/chosen": -154.8632049560547, "logps/rejected": -290.7928161621094, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.681775450706482, "rewards/margins": 13.91063404083252, "rewards/rejected": -15.592411041259766, "step": 2753 }, { "epoch": 0.94, "learning_rate": 1.1453976974719215e-06, "logits/chosen": -0.2315366566181183, "logits/rejected": -0.21600012481212616, "logps/chosen": -147.76702880859375, "logps/rejected": -294.03167724609375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.46058595180511475, "rewards/margins": 13.641885757446289, "rewards/rejected": -14.102472305297852, "step": 2754 }, { "epoch": 0.94, "learning_rate": 1.144850840691751e-06, "logits/chosen": -0.3421799838542938, "logits/rejected": -0.3232394754886627, "logps/chosen": -231.94845581054688, "logps/rejected": -418.0204162597656, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.6616653203964233, "rewards/margins": 18.723634719848633, "rewards/rejected": -20.385299682617188, "step": 2755 }, { "epoch": 0.94, "learning_rate": 1.1443039396616475e-06, "logits/chosen": -0.3195216953754425, "logits/rejected": -0.28073450922966003, "logps/chosen": -200.403076171875, "logps/rejected": -292.0351257324219, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.053283929824829, "rewards/margins": 12.130224227905273, "rewards/rejected": -13.183507919311523, "step": 2756 }, { "epoch": 0.94, "learning_rate": 1.1437569945486817e-06, "logits/chosen": -0.22329165041446686, "logits/rejected": -0.2015828788280487, "logps/chosen": -125.66990661621094, "logps/rejected": -205.39186096191406, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.6936776638031006, "rewards/margins": 10.916604995727539, "rewards/rejected": -11.610282897949219, "step": 2757 }, { "epoch": 0.94, "learning_rate": 1.1432100055199381e-06, "logits/chosen": -0.33211255073547363, "logits/rejected": -0.2987990975379944, "logps/chosen": -224.99366760253906, "logps/rejected": -348.0487060546875, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.37015387415885925, "rewards/margins": 15.20519733428955, "rewards/rejected": -15.575350761413574, "step": 2758 }, { "epoch": 0.94, "learning_rate": 1.1426629727425142e-06, "logits/chosen": -0.2592466473579407, "logits/rejected": -0.2514722943305969, "logps/chosen": -183.51512145996094, "logps/rejected": -337.94879150390625, "loss": 0.068, "rewards/accuracies": 1.0, "rewards/chosen": -1.18549382686615, "rewards/margins": 14.248374938964844, "rewards/rejected": -15.433868408203125, "step": 2759 }, { "epoch": 0.94, "learning_rate": 1.1421158963835207e-06, "logits/chosen": -0.27807295322418213, "logits/rejected": -0.22734656929969788, "logps/chosen": -160.7154998779297, "logps/rejected": -264.6142578125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.8822777271270752, "rewards/margins": 11.938901901245117, "rewards/rejected": -13.821179389953613, "step": 2760 }, { "epoch": 0.94, "learning_rate": 1.1415687766100823e-06, "logits/chosen": -0.2214609831571579, "logits/rejected": -0.18854862451553345, "logps/chosen": -220.7324676513672, "logps/rejected": -408.22344970703125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.5441739559173584, "rewards/margins": 15.76231575012207, "rewards/rejected": -18.306488037109375, "step": 2761 }, { "epoch": 0.94, "learning_rate": 1.1410216135893362e-06, "logits/chosen": -0.3029720187187195, "logits/rejected": -0.28298279643058777, "logps/chosen": -268.82281494140625, "logps/rejected": -427.0839538574219, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.2815048694610596, "rewards/margins": 16.532686233520508, "rewards/rejected": -18.814189910888672, "step": 2762 }, { "epoch": 0.94, "learning_rate": 1.1404744074884337e-06, "logits/chosen": -0.227620929479599, "logits/rejected": -0.17223824560642242, "logps/chosen": -255.4447021484375, "logps/rejected": -288.2774963378906, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.03205990791320801, "rewards/margins": 11.467886924743652, "rewards/rejected": -11.435826301574707, "step": 2763 }, { "epoch": 0.94, "learning_rate": 1.139927158474538e-06, "logits/chosen": -0.3192071318626404, "logits/rejected": -0.28819769620895386, "logps/chosen": -198.34690856933594, "logps/rejected": -330.4926452636719, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -2.074242353439331, "rewards/margins": 13.636214256286621, "rewards/rejected": -15.710456848144531, "step": 2764 }, { "epoch": 0.94, "learning_rate": 1.1393798667148262e-06, "logits/chosen": -0.30469512939453125, "logits/rejected": -0.2610064148902893, "logps/chosen": -208.8341522216797, "logps/rejected": -297.4158630371094, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.7036616802215576, "rewards/margins": 13.697530746459961, "rewards/rejected": -14.401193618774414, "step": 2765 }, { "epoch": 0.94, "learning_rate": 1.1388325323764886e-06, "logits/chosen": -0.23484380543231964, "logits/rejected": -0.19822806119918823, "logps/chosen": -184.34144592285156, "logps/rejected": -232.29412841796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.24085205793380737, "rewards/margins": 11.22392463684082, "rewards/rejected": -11.464776992797852, "step": 2766 }, { "epoch": 0.94, "learning_rate": 1.1382851556267288e-06, "logits/chosen": -0.2134953737258911, "logits/rejected": -0.18956874310970306, "logps/chosen": -205.76571655273438, "logps/rejected": -326.9468688964844, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0840911865234375, "rewards/margins": 11.773380279541016, "rewards/rejected": -12.85747241973877, "step": 2767 }, { "epoch": 0.94, "learning_rate": 1.1377377366327618e-06, "logits/chosen": -0.2980368137359619, "logits/rejected": -0.2915593683719635, "logps/chosen": -212.70660400390625, "logps/rejected": -370.12872314453125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.34189751744270325, "rewards/margins": 14.648876190185547, "rewards/rejected": -14.306979179382324, "step": 2768 }, { "epoch": 0.95, "learning_rate": 1.1371902755618176e-06, "logits/chosen": -0.22859476506710052, "logits/rejected": -0.19595706462860107, "logps/chosen": -209.43243408203125, "logps/rejected": -300.9209899902344, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.1359938383102417, "rewards/margins": 13.231451988220215, "rewards/rejected": -14.36744499206543, "step": 2769 }, { "epoch": 0.95, "learning_rate": 1.1366427725811372e-06, "logits/chosen": -0.22515137493610382, "logits/rejected": -0.22404354810714722, "logps/chosen": -188.77133178710938, "logps/rejected": -319.31744384765625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.597898244857788, "rewards/margins": 11.000422477722168, "rewards/rejected": -12.598320960998535, "step": 2770 }, { "epoch": 0.95, "learning_rate": 1.1360952278579753e-06, "logits/chosen": -0.2118500918149948, "logits/rejected": -0.17422519624233246, "logps/chosen": -199.38360595703125, "logps/rejected": -356.50323486328125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2576730251312256, "rewards/margins": 15.375327110290527, "rewards/rejected": -16.632999420166016, "step": 2771 }, { "epoch": 0.95, "learning_rate": 1.1355476415595996e-06, "logits/chosen": -0.29337844252586365, "logits/rejected": -0.24998222291469574, "logps/chosen": -166.50392150878906, "logps/rejected": -292.83465576171875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.6642937660217285, "rewards/margins": 13.067955017089844, "rewards/rejected": -15.73224925994873, "step": 2772 }, { "epoch": 0.95, "learning_rate": 1.13500001385329e-06, "logits/chosen": -0.22467723488807678, "logits/rejected": -0.2118821144104004, "logps/chosen": -217.53564453125, "logps/rejected": -345.0787658691406, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.4204438924789429, "rewards/margins": 12.939155578613281, "rewards/rejected": -14.359601020812988, "step": 2773 }, { "epoch": 0.95, "learning_rate": 1.1344523449063395e-06, "logits/chosen": -0.3182775676250458, "logits/rejected": -0.2720872759819031, "logps/chosen": -203.58737182617188, "logps/rejected": -273.60955810546875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.3226823806762695, "rewards/margins": 11.831535339355469, "rewards/rejected": -13.154216766357422, "step": 2774 }, { "epoch": 0.95, "learning_rate": 1.1339046348860525e-06, "logits/chosen": -0.2553819715976715, "logits/rejected": -0.2592531442642212, "logps/chosen": -193.3462371826172, "logps/rejected": -422.9248046875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8320999145507812, "rewards/margins": 16.79745864868164, "rewards/rejected": -17.62955665588379, "step": 2775 }, { "epoch": 0.95, "learning_rate": 1.133356883959748e-06, "logits/chosen": -0.2997628152370453, "logits/rejected": -0.2727785110473633, "logps/chosen": -236.09437561035156, "logps/rejected": -357.8249816894531, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.15059156715869904, "rewards/margins": 16.936845779418945, "rewards/rejected": -17.087438583374023, "step": 2776 }, { "epoch": 0.95, "learning_rate": 1.1328090922947555e-06, "logits/chosen": -0.3012339472770691, "logits/rejected": -0.2908080220222473, "logps/chosen": -237.24365234375, "logps/rejected": -427.0434875488281, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.5491883754730225, "rewards/margins": 16.812957763671875, "rewards/rejected": -18.362144470214844, "step": 2777 }, { "epoch": 0.95, "learning_rate": 1.1322612600584184e-06, "logits/chosen": -0.34868037700653076, "logits/rejected": -0.3135606050491333, "logps/chosen": -197.36349487304688, "logps/rejected": -354.0079040527344, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -1.8739668130874634, "rewards/margins": 14.387950897216797, "rewards/rejected": -16.261917114257812, "step": 2778 }, { "epoch": 0.95, "learning_rate": 1.1317133874180915e-06, "logits/chosen": -0.2682642340660095, "logits/rejected": -0.22261351346969604, "logps/chosen": -170.6976776123047, "logps/rejected": -344.08135986328125, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": -2.1030867099761963, "rewards/margins": 15.96219253540039, "rewards/rejected": -18.06528091430664, "step": 2779 }, { "epoch": 0.95, "learning_rate": 1.1311654745411422e-06, "logits/chosen": -0.23302869498729706, "logits/rejected": -0.21912312507629395, "logps/chosen": -186.39166259765625, "logps/rejected": -259.5854797363281, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.3621357679367065, "rewards/margins": 9.950016021728516, "rewards/rejected": -11.312150955200195, "step": 2780 }, { "epoch": 0.95, "learning_rate": 1.1306175215949508e-06, "logits/chosen": -0.31382936239242554, "logits/rejected": -0.2761267125606537, "logps/chosen": -208.20721435546875, "logps/rejected": -367.5005187988281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.7690987586975098, "rewards/margins": 15.916523933410645, "rewards/rejected": -17.685625076293945, "step": 2781 }, { "epoch": 0.95, "learning_rate": 1.130069528746909e-06, "logits/chosen": -0.16325636208057404, "logits/rejected": -0.14304578304290771, "logps/chosen": -155.0653839111328, "logps/rejected": -325.3392028808594, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.6966396570205688, "rewards/margins": 15.606974601745605, "rewards/rejected": -17.303613662719727, "step": 2782 }, { "epoch": 0.95, "learning_rate": 1.1295214961644216e-06, "logits/chosen": -0.24475900828838348, "logits/rejected": -0.25660449266433716, "logps/chosen": -148.99981689453125, "logps/rejected": -344.4903564453125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9576416015625, "rewards/margins": 14.882855415344238, "rewards/rejected": -15.840494155883789, "step": 2783 }, { "epoch": 0.95, "learning_rate": 1.1289734240149038e-06, "logits/chosen": -0.33135664463043213, "logits/rejected": -0.30389735102653503, "logps/chosen": -212.10403442382812, "logps/rejected": -366.186767578125, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.626605749130249, "rewards/margins": 17.983078002929688, "rewards/rejected": -18.609683990478516, "step": 2784 }, { "epoch": 0.95, "learning_rate": 1.1284253124657853e-06, "logits/chosen": -0.1740962266921997, "logits/rejected": -0.1401275098323822, "logps/chosen": -206.6511688232422, "logps/rejected": -327.33990478515625, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -2.4895405769348145, "rewards/margins": 13.063499450683594, "rewards/rejected": -15.553038597106934, "step": 2785 }, { "epoch": 0.95, "learning_rate": 1.127877161684506e-06, "logits/chosen": -0.26488974690437317, "logits/rejected": -0.21693061292171478, "logps/chosen": -246.28150939941406, "logps/rejected": -345.146484375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9583843946456909, "rewards/margins": 15.359309196472168, "rewards/rejected": -16.31769371032715, "step": 2786 }, { "epoch": 0.95, "learning_rate": 1.1273289718385185e-06, "logits/chosen": -0.3598621189594269, "logits/rejected": -0.3330748677253723, "logps/chosen": -245.69268798828125, "logps/rejected": -381.4440002441406, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.024127811193466187, "rewards/margins": 17.59433937072754, "rewards/rejected": -17.57021141052246, "step": 2787 }, { "epoch": 0.95, "learning_rate": 1.1267807430952877e-06, "logits/chosen": -0.28006890416145325, "logits/rejected": -0.24535685777664185, "logps/chosen": -163.34242248535156, "logps/rejected": -253.8955841064453, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -1.5429153442382812, "rewards/margins": 11.100981712341309, "rewards/rejected": -12.643898010253906, "step": 2788 }, { "epoch": 0.95, "learning_rate": 1.126232475622289e-06, "logits/chosen": -0.266505628824234, "logits/rejected": -0.2394721657037735, "logps/chosen": -228.04051208496094, "logps/rejected": -430.4022216796875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.8353943824768066, "rewards/margins": 17.699283599853516, "rewards/rejected": -20.534677505493164, "step": 2789 }, { "epoch": 0.95, "learning_rate": 1.1256841695870111e-06, "logits/chosen": -0.3487031161785126, "logits/rejected": -0.32270821928977966, "logps/chosen": -173.00576782226562, "logps/rejected": -336.2986145019531, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5587713122367859, "rewards/margins": 15.152145385742188, "rewards/rejected": -15.710914611816406, "step": 2790 }, { "epoch": 0.95, "learning_rate": 1.125135825156954e-06, "logits/chosen": -0.28026771545410156, "logits/rejected": -0.24111153185367584, "logps/chosen": -257.3670349121094, "logps/rejected": -366.0054931640625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.4297092854976654, "rewards/margins": 15.81344985961914, "rewards/rejected": -15.38374137878418, "step": 2791 }, { "epoch": 0.95, "learning_rate": 1.1245874424996291e-06, "logits/chosen": -0.36070549488067627, "logits/rejected": -0.3326796591281891, "logps/chosen": -250.583984375, "logps/rejected": -426.47869873046875, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.5364700555801392, "rewards/margins": 17.435447692871094, "rewards/rejected": -17.971920013427734, "step": 2792 }, { "epoch": 0.95, "learning_rate": 1.1240390217825601e-06, "logits/chosen": -0.24233458936214447, "logits/rejected": -0.2223307192325592, "logps/chosen": -191.27406311035156, "logps/rejected": -267.02423095703125, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -1.4025602340698242, "rewards/margins": 10.10978889465332, "rewards/rejected": -11.512348175048828, "step": 2793 }, { "epoch": 0.95, "learning_rate": 1.1234905631732819e-06, "logits/chosen": -0.16583961248397827, "logits/rejected": -0.15634211897850037, "logps/chosen": -131.9791717529297, "logps/rejected": -277.0111083984375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.2297720909118652, "rewards/margins": 12.540109634399414, "rewards/rejected": -14.769880294799805, "step": 2794 }, { "epoch": 0.95, "learning_rate": 1.1229420668393404e-06, "logits/chosen": -0.24325667321681976, "logits/rejected": -0.206635519862175, "logps/chosen": -195.09817504882812, "logps/rejected": -329.09765625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 0.18057067692279816, "rewards/margins": 16.479000091552734, "rewards/rejected": -16.298429489135742, "step": 2795 }, { "epoch": 0.95, "learning_rate": 1.1223935329482941e-06, "logits/chosen": -0.31156060099601746, "logits/rejected": -0.2829517126083374, "logps/chosen": -200.2306671142578, "logps/rejected": -304.07318115234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1599935293197632, "rewards/margins": 15.417312622070312, "rewards/rejected": -16.577306747436523, "step": 2796 }, { "epoch": 0.95, "learning_rate": 1.1218449616677128e-06, "logits/chosen": -0.23180362582206726, "logits/rejected": -0.22248287498950958, "logps/chosen": -184.40025329589844, "logps/rejected": -312.8453063964844, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": -0.029557615518569946, "rewards/margins": 13.798063278198242, "rewards/rejected": -13.827620506286621, "step": 2797 }, { "epoch": 0.95, "learning_rate": 1.1212963531651771e-06, "logits/chosen": -0.25323522090911865, "logits/rejected": -0.23864640295505524, "logps/chosen": -160.39337158203125, "logps/rejected": -346.5679016113281, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.6920326948165894, "rewards/margins": 16.38847541809082, "rewards/rejected": -17.080509185791016, "step": 2798 }, { "epoch": 0.96, "learning_rate": 1.1207477076082794e-06, "logits/chosen": -0.3335803151130676, "logits/rejected": -0.29434606432914734, "logps/chosen": -262.5250244140625, "logps/rejected": -363.819580078125, "loss": 0.0355, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2907845973968506, "rewards/margins": 12.485647201538086, "rewards/rejected": -13.776432991027832, "step": 2799 }, { "epoch": 0.96, "learning_rate": 1.1201990251646237e-06, "logits/chosen": -0.2503669857978821, "logits/rejected": -0.22003328800201416, "logps/chosen": -155.29879760742188, "logps/rejected": -303.8477478027344, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.291885495185852, "rewards/margins": 13.489738464355469, "rewards/rejected": -14.781622886657715, "step": 2800 }, { "epoch": 0.96, "learning_rate": 1.119650306001824e-06, "logits/chosen": -0.2631169855594635, "logits/rejected": -0.22372032701969147, "logps/chosen": -208.82534790039062, "logps/rejected": -395.4279479980469, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.7680047750473022, "rewards/margins": 17.442153930664062, "rewards/rejected": -19.210159301757812, "step": 2801 }, { "epoch": 0.96, "learning_rate": 1.1191015502875077e-06, "logits/chosen": -0.3212888538837433, "logits/rejected": -0.297607421875, "logps/chosen": -220.7753448486328, "logps/rejected": -379.7704162597656, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 0.04100954532623291, "rewards/margins": 18.15365982055664, "rewards/rejected": -18.11264991760254, "step": 2802 }, { "epoch": 0.96, "learning_rate": 1.1185527581893111e-06, "logits/chosen": -0.13163627684116364, "logits/rejected": -0.100887231528759, "logps/chosen": -174.52914428710938, "logps/rejected": -286.9696044921875, "loss": 0.0376, "rewards/accuracies": 0.9375, "rewards/chosen": -1.208709955215454, "rewards/margins": 13.377656936645508, "rewards/rejected": -14.586366653442383, "step": 2803 }, { "epoch": 0.96, "learning_rate": 1.1180039298748829e-06, "logits/chosen": -0.30930009484291077, "logits/rejected": -0.27687081694602966, "logps/chosen": -214.50184631347656, "logps/rejected": -309.2202453613281, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.8941740393638611, "rewards/margins": 14.523183822631836, "rewards/rejected": -15.417357444763184, "step": 2804 }, { "epoch": 0.96, "learning_rate": 1.1174550655118824e-06, "logits/chosen": -0.26402080059051514, "logits/rejected": -0.2273765206336975, "logps/chosen": -199.14794921875, "logps/rejected": -301.5693359375, "loss": 0.0173, "rewards/accuracies": 0.9375, "rewards/chosen": -0.41659295558929443, "rewards/margins": 10.935386657714844, "rewards/rejected": -11.35197925567627, "step": 2805 }, { "epoch": 0.96, "learning_rate": 1.1169061652679808e-06, "logits/chosen": -0.2575455605983734, "logits/rejected": -0.20534981787204742, "logps/chosen": -264.9076843261719, "logps/rejected": -386.1626892089844, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.14100825786590576, "rewards/margins": 16.944124221801758, "rewards/rejected": -16.803117752075195, "step": 2806 }, { "epoch": 0.96, "learning_rate": 1.1163572293108587e-06, "logits/chosen": -0.25997012853622437, "logits/rejected": -0.23261381685733795, "logps/chosen": -195.61563110351562, "logps/rejected": -310.294921875, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.8634604215621948, "rewards/margins": 10.6813325881958, "rewards/rejected": -12.544793128967285, "step": 2807 }, { "epoch": 0.96, "learning_rate": 1.115808257808209e-06, "logits/chosen": -0.42733484506607056, "logits/rejected": -0.387347549200058, "logps/chosen": -202.05462646484375, "logps/rejected": -314.5301513671875, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.6767020225524902, "rewards/margins": 14.003521919250488, "rewards/rejected": -15.68022346496582, "step": 2808 }, { "epoch": 0.96, "learning_rate": 1.1152592509277346e-06, "logits/chosen": -0.2762523591518402, "logits/rejected": -0.2442227303981781, "logps/chosen": -132.0223388671875, "logps/rejected": -211.75384521484375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.3253260850906372, "rewards/margins": 9.846467971801758, "rewards/rejected": -10.171792984008789, "step": 2809 }, { "epoch": 0.96, "learning_rate": 1.1147102088371496e-06, "logits/chosen": -0.39603641629219055, "logits/rejected": -0.3456882834434509, "logps/chosen": -207.47067260742188, "logps/rejected": -354.00054931640625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.4958927631378174, "rewards/margins": 17.137203216552734, "rewards/rejected": -18.633094787597656, "step": 2810 }, { "epoch": 0.96, "learning_rate": 1.1141611317041787e-06, "logits/chosen": -0.3338954448699951, "logits/rejected": -0.29371485114097595, "logps/chosen": -230.47291564941406, "logps/rejected": -397.69378662109375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.776685357093811, "rewards/margins": 18.467182159423828, "rewards/rejected": -19.24386978149414, "step": 2811 }, { "epoch": 0.96, "learning_rate": 1.113612019696558e-06, "logits/chosen": -0.21444980800151825, "logits/rejected": -0.16735635697841644, "logps/chosen": -237.85272216796875, "logps/rejected": -383.66949462890625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.4298018217086792, "rewards/margins": 16.7474365234375, "rewards/rejected": -18.177234649658203, "step": 2812 }, { "epoch": 0.96, "learning_rate": 1.113062872982033e-06, "logits/chosen": -0.29816734790802, "logits/rejected": -0.25475171208381653, "logps/chosen": -205.70443725585938, "logps/rejected": -282.6885986328125, "loss": 0.0487, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3736746311187744, "rewards/margins": 12.540199279785156, "rewards/rejected": -14.913873672485352, "step": 2813 }, { "epoch": 0.96, "learning_rate": 1.1125136917283604e-06, "logits/chosen": -0.25720182061195374, "logits/rejected": -0.25272053480148315, "logps/chosen": -205.4793243408203, "logps/rejected": -341.260498046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8615456223487854, "rewards/margins": 14.971822738647461, "rewards/rejected": -15.833368301391602, "step": 2814 }, { "epoch": 0.96, "learning_rate": 1.1119644761033077e-06, "logits/chosen": -0.2889827489852905, "logits/rejected": -0.27022233605384827, "logps/chosen": -211.8789825439453, "logps/rejected": -357.22039794921875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.0240637063980103, "rewards/margins": 16.493980407714844, "rewards/rejected": -17.51804542541504, "step": 2815 }, { "epoch": 0.96, "learning_rate": 1.1114152262746528e-06, "logits/chosen": -0.36976146697998047, "logits/rejected": -0.34848693013191223, "logps/chosen": -194.9239959716797, "logps/rejected": -364.07086181640625, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.0953255891799927, "rewards/margins": 14.862082481384277, "rewards/rejected": -15.957408905029297, "step": 2816 }, { "epoch": 0.96, "learning_rate": 1.1108659424101841e-06, "logits/chosen": -0.1936844140291214, "logits/rejected": -0.16953334212303162, "logps/chosen": -157.39077758789062, "logps/rejected": -277.0897216796875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4438283443450928, "rewards/margins": 12.976863861083984, "rewards/rejected": -14.42069149017334, "step": 2817 }, { "epoch": 0.96, "learning_rate": 1.1103166246776998e-06, "logits/chosen": -0.19657744467258453, "logits/rejected": -0.15958434343338013, "logps/chosen": -199.0076446533203, "logps/rejected": -308.8568115234375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.971941351890564, "rewards/margins": 16.1890926361084, "rewards/rejected": -17.161033630371094, "step": 2818 }, { "epoch": 0.96, "learning_rate": 1.1097672732450086e-06, "logits/chosen": -0.22494350373744965, "logits/rejected": -0.21454085409641266, "logps/chosen": -123.08879852294922, "logps/rejected": -247.83139038085938, "loss": 0.0564, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8223577737808228, "rewards/margins": 10.956962585449219, "rewards/rejected": -12.779319763183594, "step": 2819 }, { "epoch": 0.96, "learning_rate": 1.1092178882799308e-06, "logits/chosen": -0.25324758887290955, "logits/rejected": -0.24286560714244843, "logps/chosen": -206.4688720703125, "logps/rejected": -392.36181640625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.0356452465057373, "rewards/margins": 15.368427276611328, "rewards/rejected": -17.404071807861328, "step": 2820 }, { "epoch": 0.96, "learning_rate": 1.1086684699502955e-06, "logits/chosen": -0.3192855715751648, "logits/rejected": -0.3037605583667755, "logps/chosen": -173.02963256835938, "logps/rejected": -293.3514404296875, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -1.470638394355774, "rewards/margins": 13.556947708129883, "rewards/rejected": -15.027585983276367, "step": 2821 }, { "epoch": 0.96, "learning_rate": 1.1081190184239417e-06, "logits/chosen": -0.2617834210395813, "logits/rejected": -0.2386055886745453, "logps/chosen": -202.55648803710938, "logps/rejected": -381.3213806152344, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.9785915017127991, "rewards/margins": 16.492948532104492, "rewards/rejected": -17.471540451049805, "step": 2822 }, { "epoch": 0.96, "learning_rate": 1.10756953386872e-06, "logits/chosen": -0.29709291458129883, "logits/rejected": -0.30107465386390686, "logps/chosen": -147.9276580810547, "logps/rejected": -288.31195068359375, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -1.3750876188278198, "rewards/margins": 11.770447731018066, "rewards/rejected": -13.145535469055176, "step": 2823 }, { "epoch": 0.96, "learning_rate": 1.1070200164524906e-06, "logits/chosen": -0.25045907497406006, "logits/rejected": -0.22204937040805817, "logps/chosen": -188.10086059570312, "logps/rejected": -356.47137451171875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.5263344049453735, "rewards/margins": 14.872626304626465, "rewards/rejected": -16.398962020874023, "step": 2824 }, { "epoch": 0.96, "learning_rate": 1.1064704663431226e-06, "logits/chosen": -0.31736084818840027, "logits/rejected": -0.3016687035560608, "logps/chosen": -181.091796875, "logps/rejected": -336.9005126953125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.5845671892166138, "rewards/margins": 14.389908790588379, "rewards/rejected": -15.97447395324707, "step": 2825 }, { "epoch": 0.96, "learning_rate": 1.1059208837084968e-06, "logits/chosen": -0.3093433082103729, "logits/rejected": -0.2928062975406647, "logps/chosen": -224.31321716308594, "logps/rejected": -363.01763916015625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7377212047576904, "rewards/margins": 16.806934356689453, "rewards/rejected": -17.544654846191406, "step": 2826 }, { "epoch": 0.96, "learning_rate": 1.1053712687165028e-06, "logits/chosen": -0.23164354264736176, "logits/rejected": -0.19391846656799316, "logps/chosen": -265.72161865234375, "logps/rejected": -344.41961669921875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.919870138168335, "rewards/margins": 13.506122589111328, "rewards/rejected": -15.425992012023926, "step": 2827 }, { "epoch": 0.97, "learning_rate": 1.1048216215350402e-06, "logits/chosen": -0.23946550488471985, "logits/rejected": -0.242345929145813, "logps/chosen": -199.54031372070312, "logps/rejected": -319.2476806640625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -2.1319751739501953, "rewards/margins": 11.51494026184082, "rewards/rejected": -13.646916389465332, "step": 2828 }, { "epoch": 0.97, "learning_rate": 1.1042719423320185e-06, "logits/chosen": -0.23315809667110443, "logits/rejected": -0.21491238474845886, "logps/chosen": -210.29676818847656, "logps/rejected": -342.4515075683594, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -1.6966536045074463, "rewards/margins": 14.77724838256836, "rewards/rejected": -16.473901748657227, "step": 2829 }, { "epoch": 0.97, "learning_rate": 1.103722231275358e-06, "logits/chosen": -0.31119126081466675, "logits/rejected": -0.24314655363559723, "logps/chosen": -232.1356964111328, "logps/rejected": -291.58099365234375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.4986356496810913, "rewards/margins": 12.667226791381836, "rewards/rejected": -14.165861129760742, "step": 2830 }, { "epoch": 0.97, "learning_rate": 1.103172488532987e-06, "logits/chosen": -0.21673139929771423, "logits/rejected": -0.19291511178016663, "logps/chosen": -189.70388793945312, "logps/rejected": -299.22021484375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.4987430572509766, "rewards/margins": 13.076813697814941, "rewards/rejected": -14.575556755065918, "step": 2831 }, { "epoch": 0.97, "learning_rate": 1.1026227142728442e-06, "logits/chosen": -0.3415306806564331, "logits/rejected": -0.3245197832584381, "logps/chosen": -190.82806396484375, "logps/rejected": -336.8965148925781, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1903836727142334, "rewards/margins": 12.919880867004395, "rewards/rejected": -14.11026668548584, "step": 2832 }, { "epoch": 0.97, "learning_rate": 1.1020729086628789e-06, "logits/chosen": -0.31585025787353516, "logits/rejected": -0.30212172865867615, "logps/chosen": -188.2326202392578, "logps/rejected": -349.87408447265625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.28475356101989746, "rewards/margins": 14.217884063720703, "rewards/rejected": -14.502638816833496, "step": 2833 }, { "epoch": 0.97, "learning_rate": 1.1015230718710483e-06, "logits/chosen": -0.2399131953716278, "logits/rejected": -0.1936652809381485, "logps/chosen": -180.05758666992188, "logps/rejected": -309.7210388183594, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.180202603340149, "rewards/margins": 16.75177764892578, "rewards/rejected": -17.93198013305664, "step": 2834 }, { "epoch": 0.97, "learning_rate": 1.1009732040653203e-06, "logits/chosen": -0.28769540786743164, "logits/rejected": -0.2573428153991699, "logps/chosen": -210.14938354492188, "logps/rejected": -327.1653747558594, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9495047926902771, "rewards/margins": 12.696551322937012, "rewards/rejected": -13.64605712890625, "step": 2835 }, { "epoch": 0.97, "learning_rate": 1.1004233054136725e-06, "logits/chosen": -0.36723461747169495, "logits/rejected": -0.37082257866859436, "logps/chosen": -167.67349243164062, "logps/rejected": -376.4405212402344, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.406498908996582, "rewards/margins": 15.712520599365234, "rewards/rejected": -18.119020462036133, "step": 2836 }, { "epoch": 0.97, "learning_rate": 1.09987337608409e-06, "logits/chosen": -0.24947641789913177, "logits/rejected": -0.20672361552715302, "logps/chosen": -199.18707275390625, "logps/rejected": -339.7784729003906, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.8824480175971985, "rewards/margins": 13.421292304992676, "rewards/rejected": -14.303741455078125, "step": 2837 }, { "epoch": 0.97, "learning_rate": 1.0993234162445698e-06, "logits/chosen": -0.17264039814472198, "logits/rejected": -0.1455966979265213, "logps/chosen": -226.54885864257812, "logps/rejected": -386.6455078125, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 1.4064610004425049, "rewards/margins": 16.56244659423828, "rewards/rejected": -15.155984878540039, "step": 2838 }, { "epoch": 0.97, "learning_rate": 1.0987734260631168e-06, "logits/chosen": -0.1640004813671112, "logits/rejected": -0.1344270408153534, "logps/chosen": -223.15829467773438, "logps/rejected": -382.4873962402344, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -2.1356253623962402, "rewards/margins": 16.241859436035156, "rewards/rejected": -18.377485275268555, "step": 2839 }, { "epoch": 0.97, "learning_rate": 1.098223405707745e-06, "logits/chosen": -0.21291609108448029, "logits/rejected": -0.19992122054100037, "logps/chosen": -153.75216674804688, "logps/rejected": -305.4644775390625, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.6277300119400024, "rewards/margins": 14.290594100952148, "rewards/rejected": -14.91832447052002, "step": 2840 }, { "epoch": 0.97, "learning_rate": 1.097673355346478e-06, "logits/chosen": -0.27775752544403076, "logits/rejected": -0.2454657256603241, "logps/chosen": -205.0941162109375, "logps/rejected": -346.8582458496094, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.012706741690635681, "rewards/margins": 18.06441307067871, "rewards/rejected": -18.05170440673828, "step": 2841 }, { "epoch": 0.97, "learning_rate": 1.0971232751473493e-06, "logits/chosen": -0.2506794035434723, "logits/rejected": -0.2468172311782837, "logps/chosen": -166.29420471191406, "logps/rejected": -318.1846923828125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.857237696647644, "rewards/margins": 13.38369083404541, "rewards/rejected": -15.240927696228027, "step": 2842 }, { "epoch": 0.97, "learning_rate": 1.0965731652784e-06, "logits/chosen": -0.3353486657142639, "logits/rejected": -0.3218892812728882, "logps/chosen": -165.03753662109375, "logps/rejected": -278.20159912109375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.7511493563652039, "rewards/margins": 13.065068244934082, "rewards/rejected": -13.816218376159668, "step": 2843 }, { "epoch": 0.97, "learning_rate": 1.0960230259076817e-06, "logits/chosen": -0.24929188191890717, "logits/rejected": -0.23711784183979034, "logps/chosen": -170.70957946777344, "logps/rejected": -313.790771484375, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.4513967037200928, "rewards/margins": 13.633620262145996, "rewards/rejected": -15.085016250610352, "step": 2844 }, { "epoch": 0.97, "learning_rate": 1.0954728572032543e-06, "logits/chosen": -0.2859129011631012, "logits/rejected": -0.2737656533718109, "logps/chosen": -122.93384552001953, "logps/rejected": -267.7967834472656, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.0585330724716187, "rewards/margins": 12.728718757629395, "rewards/rejected": -13.787252426147461, "step": 2845 }, { "epoch": 0.97, "learning_rate": 1.0949226593331862e-06, "logits/chosen": -0.29371532797813416, "logits/rejected": -0.2558034360408783, "logps/chosen": -208.7578887939453, "logps/rejected": -365.3069152832031, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.2616868019104004, "rewards/margins": 15.71921157836914, "rewards/rejected": -17.980899810791016, "step": 2846 }, { "epoch": 0.97, "learning_rate": 1.0943724324655556e-06, "logits/chosen": -0.33331069350242615, "logits/rejected": -0.2919924259185791, "logps/chosen": -211.09808349609375, "logps/rejected": -324.69354248046875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.2828518748283386, "rewards/margins": 15.681394577026367, "rewards/rejected": -15.964244842529297, "step": 2847 }, { "epoch": 0.97, "learning_rate": 1.0938221767684497e-06, "logits/chosen": -0.2449781596660614, "logits/rejected": -0.19134394824504852, "logps/chosen": -213.556396484375, "logps/rejected": -236.47079467773438, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.9215133190155029, "rewards/margins": 10.956107139587402, "rewards/rejected": -11.877620697021484, "step": 2848 }, { "epoch": 0.97, "learning_rate": 1.0932718924099632e-06, "logits/chosen": -0.2587301731109619, "logits/rejected": -0.20740002393722534, "logps/chosen": -240.5187225341797, "logps/rejected": -297.592041015625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.9011746048927307, "rewards/margins": 14.14886474609375, "rewards/rejected": -15.050039291381836, "step": 2849 }, { "epoch": 0.97, "learning_rate": 1.092721579558201e-06, "logits/chosen": -0.2907637059688568, "logits/rejected": -0.2648519277572632, "logps/chosen": -144.31581115722656, "logps/rejected": -296.4901123046875, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.2875535190105438, "rewards/margins": 15.361335754394531, "rewards/rejected": -15.648889541625977, "step": 2850 }, { "epoch": 0.97, "learning_rate": 1.0921712383812757e-06, "logits/chosen": -0.2435406744480133, "logits/rejected": -0.2090628743171692, "logps/chosen": -170.0966796875, "logps/rejected": -272.9752502441406, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.4553630352020264, "rewards/margins": 10.43531322479248, "rewards/rejected": -11.890677452087402, "step": 2851 }, { "epoch": 0.97, "learning_rate": 1.091620869047309e-06, "logits/chosen": -0.24641850590705872, "logits/rejected": -0.21519877016544342, "logps/chosen": -237.6412353515625, "logps/rejected": -347.22344970703125, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.922504186630249, "rewards/margins": 12.399463653564453, "rewards/rejected": -14.321968078613281, "step": 2852 }, { "epoch": 0.97, "learning_rate": 1.0910704717244312e-06, "logits/chosen": -0.34344223141670227, "logits/rejected": -0.3420579135417938, "logps/chosen": -267.6466064453125, "logps/rejected": -469.9823913574219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.682093620300293, "rewards/margins": 18.785682678222656, "rewards/rejected": -19.467777252197266, "step": 2853 }, { "epoch": 0.97, "learning_rate": 1.0905200465807811e-06, "logits/chosen": -0.22988741099834442, "logits/rejected": -0.22035998106002808, "logps/chosen": -163.2932586669922, "logps/rejected": -287.22003173828125, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.6597853302955627, "rewards/margins": 11.630494117736816, "rewards/rejected": -12.290279388427734, "step": 2854 }, { "epoch": 0.97, "learning_rate": 1.089969593784506e-06, "logits/chosen": -0.3465771973133087, "logits/rejected": -0.3451457917690277, "logps/chosen": -296.3294372558594, "logps/rejected": -510.1302490234375, "loss": 0.0735, "rewards/accuracies": 1.0, "rewards/chosen": 0.4175121784210205, "rewards/margins": 18.94161605834961, "rewards/rejected": -18.524105072021484, "step": 2855 }, { "epoch": 0.97, "learning_rate": 1.0894191135037617e-06, "logits/chosen": -0.1667841225862503, "logits/rejected": -0.13313233852386475, "logps/chosen": -184.34869384765625, "logps/rejected": -336.884521484375, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.3108813166618347, "rewards/margins": 16.110740661621094, "rewards/rejected": -16.421621322631836, "step": 2856 }, { "epoch": 0.98, "learning_rate": 1.0888686059067124e-06, "logits/chosen": -0.2691715359687805, "logits/rejected": -0.2567095160484314, "logps/chosen": -224.10581970214844, "logps/rejected": -385.5494689941406, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -1.3164477348327637, "rewards/margins": 16.86551284790039, "rewards/rejected": -18.18195915222168, "step": 2857 }, { "epoch": 0.98, "learning_rate": 1.0883180711615299e-06, "logits/chosen": -0.23650948703289032, "logits/rejected": -0.23026898503303528, "logps/chosen": -240.70025634765625, "logps/rejected": -367.80426025390625, "loss": 0.0275, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6567418575286865, "rewards/margins": 15.450239181518555, "rewards/rejected": -16.10698127746582, "step": 2858 }, { "epoch": 0.98, "learning_rate": 1.0877675094363958e-06, "logits/chosen": -0.31383582949638367, "logits/rejected": -0.2989424765110016, "logps/chosen": -172.1520233154297, "logps/rejected": -276.60211181640625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578489780426025, "rewards/margins": 10.960884094238281, "rewards/rejected": -12.718732833862305, "step": 2859 }, { "epoch": 0.98, "learning_rate": 1.087216920899499e-06, "logits/chosen": -0.2094411551952362, "logits/rejected": -0.19729681313037872, "logps/chosen": -230.4011688232422, "logps/rejected": -421.8159484863281, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.6821011304855347, "rewards/margins": 17.287599563598633, "rewards/rejected": -18.969701766967773, "step": 2860 }, { "epoch": 0.98, "learning_rate": 1.0866663057190359e-06, "logits/chosen": -0.21836091578006744, "logits/rejected": -0.22624720633029938, "logps/chosen": -207.2913818359375, "logps/rejected": -438.70941162109375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1977959871292114, "rewards/margins": 20.179283142089844, "rewards/rejected": -21.377079010009766, "step": 2861 }, { "epoch": 0.98, "learning_rate": 1.0861156640632127e-06, "logits/chosen": -0.2524182200431824, "logits/rejected": -0.23837755620479584, "logps/chosen": -166.5216064453125, "logps/rejected": -283.252685546875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.357900857925415, "rewards/margins": 11.626431465148926, "rewards/rejected": -12.984332084655762, "step": 2862 }, { "epoch": 0.98, "learning_rate": 1.0855649961002426e-06, "logits/chosen": -0.2623690664768219, "logits/rejected": -0.23722901940345764, "logps/chosen": -212.1352996826172, "logps/rejected": -374.1302490234375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.4127435684204102, "rewards/margins": 15.218986511230469, "rewards/rejected": -16.631729125976562, "step": 2863 }, { "epoch": 0.98, "learning_rate": 1.0850143019983472e-06, "logits/chosen": -0.21494995057582855, "logits/rejected": -0.17364270985126495, "logps/chosen": -217.28018188476562, "logps/rejected": -365.0019226074219, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6549947261810303, "rewards/margins": 15.064743995666504, "rewards/rejected": -15.719738006591797, "step": 2864 }, { "epoch": 0.98, "learning_rate": 1.0844635819257554e-06, "logits/chosen": -0.24796849489212036, "logits/rejected": -0.19235344231128693, "logps/chosen": -246.919677734375, "logps/rejected": -340.4287109375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.4932606220245361, "rewards/margins": 14.633825302124023, "rewards/rejected": -16.127086639404297, "step": 2865 }, { "epoch": 0.98, "learning_rate": 1.0839128360507052e-06, "logits/chosen": -0.24464760720729828, "logits/rejected": -0.22205378115177155, "logps/chosen": -196.80813598632812, "logps/rejected": -356.8926696777344, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.7257492542266846, "rewards/margins": 16.446125030517578, "rewards/rejected": -17.171875, "step": 2866 }, { "epoch": 0.98, "learning_rate": 1.0833620645414416e-06, "logits/chosen": -0.3102511167526245, "logits/rejected": -0.2727736830711365, "logps/chosen": -144.1191864013672, "logps/rejected": -244.90126037597656, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -1.0820298194885254, "rewards/margins": 12.473894119262695, "rewards/rejected": -13.555925369262695, "step": 2867 }, { "epoch": 0.98, "learning_rate": 1.0828112675662175e-06, "logits/chosen": -0.29517480731010437, "logits/rejected": -0.2492057979106903, "logps/chosen": -243.503662109375, "logps/rejected": -348.137451171875, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -0.5879725217819214, "rewards/margins": 14.158994674682617, "rewards/rejected": -14.746967315673828, "step": 2868 }, { "epoch": 0.98, "learning_rate": 1.0822604452932945e-06, "logits/chosen": -0.20515647530555725, "logits/rejected": -0.16977708041667938, "logps/chosen": -220.9163818359375, "logps/rejected": -321.06536865234375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0138683319091797, "rewards/margins": 12.210184097290039, "rewards/rejected": -13.224052429199219, "step": 2869 }, { "epoch": 0.98, "learning_rate": 1.08170959789094e-06, "logits/chosen": -0.24868984520435333, "logits/rejected": -0.17448808252811432, "logps/chosen": -245.11715698242188, "logps/rejected": -356.7044982910156, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.4584460258483887, "rewards/margins": 17.474876403808594, "rewards/rejected": -18.933320999145508, "step": 2870 }, { "epoch": 0.98, "learning_rate": 1.0811587255274313e-06, "logits/chosen": -0.2541763186454773, "logits/rejected": -0.2503039538860321, "logps/chosen": -183.80984497070312, "logps/rejected": -375.16131591796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1735620498657227, "rewards/margins": 16.163976669311523, "rewards/rejected": -17.33753776550293, "step": 2871 }, { "epoch": 0.98, "learning_rate": 1.080607828371052e-06, "logits/chosen": -0.20251134037971497, "logits/rejected": -0.15746591985225677, "logps/chosen": -207.27117919921875, "logps/rejected": -317.13287353515625, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -1.076295256614685, "rewards/margins": 14.212306022644043, "rewards/rejected": -15.288599967956543, "step": 2872 }, { "epoch": 0.98, "learning_rate": 1.0800569065900933e-06, "logits/chosen": -0.3344561457633972, "logits/rejected": -0.3046797513961792, "logps/chosen": -228.0212860107422, "logps/rejected": -335.2940673828125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -2.505580425262451, "rewards/margins": 12.024043083190918, "rewards/rejected": -14.529622077941895, "step": 2873 }, { "epoch": 0.98, "learning_rate": 1.0795059603528547e-06, "logits/chosen": -0.23621174693107605, "logits/rejected": -0.20006531476974487, "logps/chosen": -245.97628784179688, "logps/rejected": -323.1099548339844, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.6220160722732544, "rewards/margins": 11.380831718444824, "rewards/rejected": -12.002845764160156, "step": 2874 }, { "epoch": 0.98, "learning_rate": 1.0789549898276425e-06, "logits/chosen": -0.30439648032188416, "logits/rejected": -0.2699452042579651, "logps/chosen": -226.09933471679688, "logps/rejected": -316.4073486328125, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": -1.2685556411743164, "rewards/margins": 12.303566932678223, "rewards/rejected": -13.572122573852539, "step": 2875 }, { "epoch": 0.98, "learning_rate": 1.0784039951827701e-06, "logits/chosen": -0.23384518921375275, "logits/rejected": -0.23462815582752228, "logps/chosen": -232.5687713623047, "logps/rejected": -375.2809143066406, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.8226360082626343, "rewards/margins": 12.772991180419922, "rewards/rejected": -14.595626831054688, "step": 2876 }, { "epoch": 0.98, "learning_rate": 1.0778529765865593e-06, "logits/chosen": -0.33929383754730225, "logits/rejected": -0.33286264538764954, "logps/chosen": -220.40463256835938, "logps/rejected": -390.33001708984375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 0.18152964115142822, "rewards/margins": 17.518287658691406, "rewards/rejected": -17.33675765991211, "step": 2877 }, { "epoch": 0.98, "learning_rate": 1.077301934207339e-06, "logits/chosen": -0.2453038990497589, "logits/rejected": -0.22436577081680298, "logps/chosen": -192.13514709472656, "logps/rejected": -322.9429931640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5226758718490601, "rewards/margins": 15.354530334472656, "rewards/rejected": -15.877204895019531, "step": 2878 }, { "epoch": 0.98, "learning_rate": 1.0767508682134441e-06, "logits/chosen": -0.21505387127399445, "logits/rejected": -0.19332309067249298, "logps/chosen": -205.26251220703125, "logps/rejected": -337.078857421875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.30347806215286255, "rewards/margins": 14.965676307678223, "rewards/rejected": -15.269153594970703, "step": 2879 }, { "epoch": 0.98, "learning_rate": 1.0761997787732183e-06, "logits/chosen": -0.22695378959178925, "logits/rejected": -0.18170998990535736, "logps/chosen": -148.81800842285156, "logps/rejected": -290.7397155761719, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0643259286880493, "rewards/margins": 14.660083770751953, "rewards/rejected": -15.724409103393555, "step": 2880 }, { "epoch": 0.98, "learning_rate": 1.0756486660550117e-06, "logits/chosen": -0.2557218074798584, "logits/rejected": -0.2352977991104126, "logps/chosen": -247.9552001953125, "logps/rejected": -396.4857177734375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.754313588142395, "rewards/margins": 15.204365730285645, "rewards/rejected": -16.95867919921875, "step": 2881 }, { "epoch": 0.98, "learning_rate": 1.0750975302271815e-06, "logits/chosen": -0.24946628510951996, "logits/rejected": -0.22948290407657623, "logps/chosen": -168.68812561035156, "logps/rejected": -279.45672607421875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 0.2946006655693054, "rewards/margins": 12.371620178222656, "rewards/rejected": -12.077018737792969, "step": 2882 }, { "epoch": 0.98, "learning_rate": 1.0745463714580922e-06, "logits/chosen": -0.23808793723583221, "logits/rejected": -0.22606492042541504, "logps/chosen": -161.6626434326172, "logps/rejected": -301.8497314453125, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -1.056410551071167, "rewards/margins": 13.439024925231934, "rewards/rejected": -14.495433807373047, "step": 2883 }, { "epoch": 0.98, "learning_rate": 1.0739951899161153e-06, "logits/chosen": -0.2811681628227234, "logits/rejected": -0.26343175768852234, "logps/chosen": -244.52764892578125, "logps/rejected": -377.75677490234375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.4836273193359375, "rewards/margins": 13.590738296508789, "rewards/rejected": -15.074365615844727, "step": 2884 }, { "epoch": 0.98, "learning_rate": 1.073443985769629e-06, "logits/chosen": -0.2060692012310028, "logits/rejected": -0.15691590309143066, "logps/chosen": -164.14085388183594, "logps/rejected": -244.0976104736328, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.8155133128166199, "rewards/margins": 13.199477195739746, "rewards/rejected": -14.01499080657959, "step": 2885 }, { "epoch": 0.98, "learning_rate": 1.0728927591870183e-06, "logits/chosen": -0.2409127801656723, "logits/rejected": -0.22047151625156403, "logps/chosen": -194.0814666748047, "logps/rejected": -351.61871337890625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.6235668659210205, "rewards/margins": 13.53071117401123, "rewards/rejected": -15.154276847839355, "step": 2886 }, { "epoch": 0.99, "learning_rate": 1.072341510336676e-06, "logits/chosen": -0.38548213243484497, "logits/rejected": -0.3707241117954254, "logps/chosen": -182.52281188964844, "logps/rejected": -291.7400207519531, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8419249057769775, "rewards/margins": 12.31701946258545, "rewards/rejected": -13.158944129943848, "step": 2887 }, { "epoch": 0.99, "learning_rate": 1.0717902393870006e-06, "logits/chosen": -0.2507656216621399, "logits/rejected": -0.21995417773723602, "logps/chosen": -226.78419494628906, "logps/rejected": -361.7003173828125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.4533131718635559, "rewards/margins": 13.580987930297852, "rewards/rejected": -14.0343017578125, "step": 2888 }, { "epoch": 0.99, "learning_rate": 1.0712389465063976e-06, "logits/chosen": -0.20363444089889526, "logits/rejected": -0.17845477163791656, "logps/chosen": -156.38031005859375, "logps/rejected": -280.5565185546875, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.3357383906841278, "rewards/margins": 14.211069107055664, "rewards/rejected": -14.546808242797852, "step": 2889 }, { "epoch": 0.99, "learning_rate": 1.0706876318632796e-06, "logits/chosen": -0.2451431304216385, "logits/rejected": -0.2169020175933838, "logps/chosen": -224.08621215820312, "logps/rejected": -376.67645263671875, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -0.38491976261138916, "rewards/margins": 15.157941818237305, "rewards/rejected": -15.542862892150879, "step": 2890 }, { "epoch": 0.99, "learning_rate": 1.0701362956260655e-06, "logits/chosen": -0.2550409138202667, "logits/rejected": -0.2443021535873413, "logps/chosen": -182.36886596679688, "logps/rejected": -361.0409851074219, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.6666509509086609, "rewards/margins": 16.34221839904785, "rewards/rejected": -17.00887107849121, "step": 2891 }, { "epoch": 0.99, "learning_rate": 1.0695849379631813e-06, "logits/chosen": -0.3492980897426605, "logits/rejected": -0.3272871673107147, "logps/chosen": -179.0654754638672, "logps/rejected": -299.3179016113281, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.797514796257019, "rewards/margins": 12.10796070098877, "rewards/rejected": -12.905475616455078, "step": 2892 }, { "epoch": 0.99, "learning_rate": 1.0690335590430588e-06, "logits/chosen": -0.2703239619731903, "logits/rejected": -0.24970531463623047, "logps/chosen": -225.51441955566406, "logps/rejected": -402.4959716796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.029252052307129, "rewards/margins": 18.21462631225586, "rewards/rejected": -17.185375213623047, "step": 2893 }, { "epoch": 0.99, "learning_rate": 1.0684821590341365e-06, "logits/chosen": -0.2482403814792633, "logits/rejected": -0.24086765944957733, "logps/chosen": -201.17376708984375, "logps/rejected": -369.0718994140625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.41830047965049744, "rewards/margins": 14.556906700134277, "rewards/rejected": -14.975207328796387, "step": 2894 }, { "epoch": 0.99, "learning_rate": 1.0679307381048594e-06, "logits/chosen": -0.2447490394115448, "logits/rejected": -0.21101200580596924, "logps/chosen": -236.0833282470703, "logps/rejected": -385.6838684082031, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3848044872283936, "rewards/margins": 16.72595977783203, "rewards/rejected": -18.110763549804688, "step": 2895 }, { "epoch": 0.99, "learning_rate": 1.06737929642368e-06, "logits/chosen": -0.2646295428276062, "logits/rejected": -0.23460102081298828, "logps/chosen": -205.9564208984375, "logps/rejected": -287.0500793457031, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.562727689743042, "rewards/margins": 10.83013916015625, "rewards/rejected": -12.392867088317871, "step": 2896 }, { "epoch": 0.99, "learning_rate": 1.0668278341590548e-06, "logits/chosen": -0.2171938121318817, "logits/rejected": -0.22010105848312378, "logps/chosen": -210.02252197265625, "logps/rejected": -343.8802490234375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4847383499145508, "rewards/margins": 13.066701889038086, "rewards/rejected": -13.551440238952637, "step": 2897 }, { "epoch": 0.99, "learning_rate": 1.0662763514794487e-06, "logits/chosen": -0.1896543800830841, "logits/rejected": -0.20080162584781647, "logps/chosen": -175.05035400390625, "logps/rejected": -339.88983154296875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.654423475265503, "rewards/margins": 12.47966480255127, "rewards/rejected": -14.134088516235352, "step": 2898 }, { "epoch": 0.99, "learning_rate": 1.065724848553332e-06, "logits/chosen": -0.34597623348236084, "logits/rejected": -0.31371602416038513, "logps/chosen": -195.89553833007812, "logps/rejected": -319.88262939453125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.8102360963821411, "rewards/margins": 14.748797416687012, "rewards/rejected": -16.559032440185547, "step": 2899 }, { "epoch": 0.99, "learning_rate": 1.065173325549181e-06, "logits/chosen": -0.24839964509010315, "logits/rejected": -0.22236259281635284, "logps/chosen": -148.63119506835938, "logps/rejected": -279.1806640625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.41334277391433716, "rewards/margins": 13.533391952514648, "rewards/rejected": -13.946735382080078, "step": 2900 }, { "epoch": 0.99, "learning_rate": 1.0646217826354782e-06, "logits/chosen": -0.30587586760520935, "logits/rejected": -0.27265724539756775, "logps/chosen": -262.6862487792969, "logps/rejected": -372.36981201171875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.7865769267082214, "rewards/margins": 15.785262107849121, "rewards/rejected": -16.57183837890625, "step": 2901 }, { "epoch": 0.99, "learning_rate": 1.064070219980713e-06, "logits/chosen": -0.29041606187820435, "logits/rejected": -0.25416532158851624, "logps/chosen": -234.1678009033203, "logps/rejected": -405.21429443359375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.817533254623413, "rewards/margins": 15.670472145080566, "rewards/rejected": -17.488006591796875, "step": 2902 }, { "epoch": 0.99, "learning_rate": 1.0635186377533795e-06, "logits/chosen": -0.2737240195274353, "logits/rejected": -0.22465577721595764, "logps/chosen": -237.11695861816406, "logps/rejected": -374.41259765625, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -1.2450569868087769, "rewards/margins": 17.04749298095703, "rewards/rejected": -18.29254913330078, "step": 2903 }, { "epoch": 0.99, "learning_rate": 1.062967036121979e-06, "logits/chosen": -0.3511107861995697, "logits/rejected": -0.33595022559165955, "logps/chosen": -266.1220703125, "logps/rejected": -412.6830749511719, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.370112657546997, "rewards/margins": 17.494285583496094, "rewards/rejected": -19.864397048950195, "step": 2904 }, { "epoch": 0.99, "learning_rate": 1.0624154152550178e-06, "logits/chosen": -0.2589268386363983, "logits/rejected": -0.23747852444648743, "logps/chosen": -192.8294219970703, "logps/rejected": -378.8187561035156, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.254187822341919, "rewards/margins": 17.90888786315918, "rewards/rejected": -19.16307830810547, "step": 2905 }, { "epoch": 0.99, "learning_rate": 1.0618637753210085e-06, "logits/chosen": -0.34808486700057983, "logits/rejected": -0.32031139731407166, "logps/chosen": -208.59469604492188, "logps/rejected": -326.22216796875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.703052043914795, "rewards/margins": 10.976423263549805, "rewards/rejected": -12.679476737976074, "step": 2906 }, { "epoch": 0.99, "learning_rate": 1.0613121164884697e-06, "logits/chosen": -0.32786068320274353, "logits/rejected": -0.26096028089523315, "logps/chosen": -234.5669708251953, "logps/rejected": -314.96929931640625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.6815385222434998, "rewards/margins": 14.073211669921875, "rewards/rejected": -14.754749298095703, "step": 2907 }, { "epoch": 0.99, "learning_rate": 1.0607604389259255e-06, "logits/chosen": -0.23317666351795197, "logits/rejected": -0.21141526103019714, "logps/chosen": -178.79104614257812, "logps/rejected": -321.66778564453125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.7111602425575256, "rewards/margins": 14.481775283813477, "rewards/rejected": -15.192935943603516, "step": 2908 }, { "epoch": 0.99, "learning_rate": 1.0602087428019056e-06, "logits/chosen": -0.30408912897109985, "logits/rejected": -0.2618972957134247, "logps/chosen": -236.69406127929688, "logps/rejected": -356.9201354980469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5435243844985962, "rewards/margins": 16.794601440429688, "rewards/rejected": -18.33812713623047, "step": 2909 }, { "epoch": 0.99, "learning_rate": 1.0596570282849452e-06, "logits/chosen": -0.23705439269542694, "logits/rejected": -0.20767909288406372, "logps/chosen": -146.4224395751953, "logps/rejected": -273.6277160644531, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.1243990659713745, "rewards/margins": 13.748678207397461, "rewards/rejected": -14.873076438903809, "step": 2910 }, { "epoch": 0.99, "learning_rate": 1.0591052955435866e-06, "logits/chosen": -0.25363990664482117, "logits/rejected": -0.22592632472515106, "logps/chosen": -216.11419677734375, "logps/rejected": -320.3292236328125, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.6378570199012756, "rewards/margins": 13.312132835388184, "rewards/rejected": -13.949987411499023, "step": 2911 }, { "epoch": 0.99, "learning_rate": 1.058553544746376e-06, "logits/chosen": -0.23149473965168, "logits/rejected": -0.20378173887729645, "logps/chosen": -144.89736938476562, "logps/rejected": -220.9456787109375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.11185850203037262, "rewards/margins": 12.503525733947754, "rewards/rejected": -12.615386009216309, "step": 2912 }, { "epoch": 0.99, "learning_rate": 1.0580017760618651e-06, "logits/chosen": -0.353240966796875, "logits/rejected": -0.32253459095954895, "logps/chosen": -238.66571044921875, "logps/rejected": -422.6841735839844, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.8630616664886475, "rewards/margins": 19.215425491333008, "rewards/rejected": -20.078487396240234, "step": 2913 }, { "epoch": 0.99, "learning_rate": 1.0574499896586122e-06, "logits/chosen": -0.27369123697280884, "logits/rejected": -0.26332542300224304, "logps/chosen": -228.89419555664062, "logps/rejected": -379.6557922363281, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.9185850620269775, "rewards/margins": 14.117166519165039, "rewards/rejected": -16.035751342773438, "step": 2914 }, { "epoch": 0.99, "learning_rate": 1.0568981857051802e-06, "logits/chosen": -0.25981757044792175, "logits/rejected": -0.23753786087036133, "logps/chosen": -219.32366943359375, "logps/rejected": -320.93756103515625, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.4436389207839966, "rewards/margins": 12.397475242614746, "rewards/rejected": -13.841114044189453, "step": 2915 }, { "epoch": 1.0, "learning_rate": 1.056346364370138e-06, "logits/chosen": -0.203555628657341, "logits/rejected": -0.17484651505947113, "logps/chosen": -227.45223999023438, "logps/rejected": -393.12445068359375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.024466782808303833, "rewards/margins": 16.358991622924805, "rewards/rejected": -16.383460998535156, "step": 2916 }, { "epoch": 1.0, "learning_rate": 1.0557945258220587e-06, "logits/chosen": -0.2079630345106125, "logits/rejected": -0.17273764312267303, "logps/chosen": -208.64004516601562, "logps/rejected": -339.3841247558594, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.8653498291969299, "rewards/margins": 15.211617469787598, "rewards/rejected": -16.076967239379883, "step": 2917 }, { "epoch": 1.0, "learning_rate": 1.0552426702295219e-06, "logits/chosen": -0.2068067342042923, "logits/rejected": -0.189309760928154, "logps/chosen": -216.61923217773438, "logps/rejected": -371.5692138671875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.6617165207862854, "rewards/margins": 16.272598266601562, "rewards/rejected": -16.934314727783203, "step": 2918 }, { "epoch": 1.0, "learning_rate": 1.0546907977611116e-06, "logits/chosen": -0.27981045842170715, "logits/rejected": -0.25827234983444214, "logps/chosen": -250.86676025390625, "logps/rejected": -406.7056579589844, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.0292125940322876, "rewards/margins": 15.141532897949219, "rewards/rejected": -16.170745849609375, "step": 2919 }, { "epoch": 1.0, "learning_rate": 1.0541389085854176e-06, "logits/chosen": -0.29328903555870056, "logits/rejected": -0.25278279185295105, "logps/chosen": -160.96629333496094, "logps/rejected": -316.76397705078125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.1301701068878174, "rewards/margins": 15.861289024353027, "rewards/rejected": -16.9914608001709, "step": 2920 }, { "epoch": 1.0, "learning_rate": 1.053587002871034e-06, "logits/chosen": -0.23193314671516418, "logits/rejected": -0.2246112823486328, "logps/chosen": -133.35360717773438, "logps/rejected": -271.06658935546875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9365994334220886, "rewards/margins": 12.885215759277344, "rewards/rejected": -13.82181453704834, "step": 2921 }, { "epoch": 1.0, "learning_rate": 1.0530350807865604e-06, "logits/chosen": -0.2290770262479782, "logits/rejected": -0.21376515924930573, "logps/chosen": -173.95558166503906, "logps/rejected": -286.8503723144531, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.2915247678756714, "rewards/margins": 13.59039306640625, "rewards/rejected": -13.298868179321289, "step": 2922 }, { "epoch": 1.0, "learning_rate": 1.0524831425006017e-06, "logits/chosen": -0.19733627140522003, "logits/rejected": -0.15388314425945282, "logps/chosen": -219.74273681640625, "logps/rejected": -338.09027099609375, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -1.051804780960083, "rewards/margins": 13.786627769470215, "rewards/rejected": -14.838431358337402, "step": 2923 }, { "epoch": 1.0, "learning_rate": 1.0519311881817672e-06, "logits/chosen": -0.23919399082660675, "logits/rejected": -0.2428116351366043, "logps/chosen": -214.23562622070312, "logps/rejected": -420.5027160644531, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.9167755246162415, "rewards/margins": 14.79420280456543, "rewards/rejected": -15.710979461669922, "step": 2924 }, { "epoch": 1.0, "learning_rate": 1.0513792179986714e-06, "logits/chosen": -0.3135545253753662, "logits/rejected": -0.28470727801322937, "logps/chosen": -192.93832397460938, "logps/rejected": -330.9713134765625, "loss": 0.0317, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7988965511322021, "rewards/margins": 15.311212539672852, "rewards/rejected": -17.110107421875, "step": 2925 }, { "epoch": 1.0, "learning_rate": 1.0508272321199334e-06, "logits/chosen": -0.18738077580928802, "logits/rejected": -0.18404363095760345, "logps/chosen": -141.66639709472656, "logps/rejected": -294.3580322265625, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -1.5114082098007202, "rewards/margins": 12.816605567932129, "rewards/rejected": -14.328014373779297, "step": 2926 }, { "epoch": 1.0, "learning_rate": 1.0502752307141778e-06, "logits/chosen": -0.30629321932792664, "logits/rejected": -0.2848072946071625, "logps/chosen": -160.23495483398438, "logps/rejected": -297.6669006347656, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.1135549545288086, "rewards/margins": 12.938697814941406, "rewards/rejected": -14.052253723144531, "step": 2927 }, { "epoch": 1.0, "learning_rate": 1.0497232139500327e-06, "logits/chosen": -0.16023419797420502, "logits/rejected": -0.10557740181684494, "logps/chosen": -183.21676635742188, "logps/rejected": -270.1053466796875, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 0.12675288319587708, "rewards/margins": 14.2373046875, "rewards/rejected": -14.110551834106445, "step": 2928 }, { "epoch": 1.0, "learning_rate": 1.049171181996132e-06, "logits/chosen": -0.17631898820400238, "logits/rejected": -0.13366585969924927, "logps/chosen": -186.74951171875, "logps/rejected": -319.2210998535156, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -1.2495218515396118, "rewards/margins": 15.015512466430664, "rewards/rejected": -16.265033721923828, "step": 2929 }, { "epoch": 1.0, "learning_rate": 1.0486191350211138e-06, "logits/chosen": -0.22795329988002777, "logits/rejected": -0.21066123247146606, "logps/chosen": -176.8048858642578, "logps/rejected": -292.3580322265625, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.8435388803482056, "rewards/margins": 11.726253509521484, "rewards/rejected": -12.569792747497559, "step": 2930 }, { "epoch": 1.0, "learning_rate": 1.0480670731936208e-06, "logits/chosen": -0.23619116842746735, "logits/rejected": -0.2262181043624878, "logps/chosen": -213.9687042236328, "logps/rejected": -346.93536376953125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -3.7534890174865723, "rewards/margins": 11.793718338012695, "rewards/rejected": -15.54720687866211, "step": 2931 }, { "epoch": 1.0, "learning_rate": 1.0475149966823002e-06, "logits/chosen": -0.18025587499141693, "logits/rejected": -0.15109553933143616, "logps/chosen": -160.087890625, "logps/rejected": -295.687255859375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.4795247316360474, "rewards/margins": 13.600210189819336, "rewards/rejected": -15.07973575592041, "step": 2932 }, { "epoch": 1.0, "learning_rate": 1.046962905655804e-06, "logits/chosen": -0.196902334690094, "logits/rejected": -0.1799551546573639, "logps/chosen": -183.59481811523438, "logps/rejected": -285.96246337890625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2194554805755615, "rewards/margins": 11.945034980773926, "rewards/rejected": -14.16448974609375, "step": 2933 }, { "epoch": 1.0, "learning_rate": 1.0464108002827881e-06, "logits/chosen": -0.2707444429397583, "logits/rejected": -0.20088240504264832, "logps/chosen": -274.7162780761719, "logps/rejected": -473.002197265625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.2371684312820435, "rewards/margins": 20.334802627563477, "rewards/rejected": -21.571969985961914, "step": 2934 }, { "epoch": 1.0, "learning_rate": 1.0458586807319132e-06, "logits/chosen": -0.1784413754940033, "logits/rejected": -0.15718694031238556, "logps/chosen": -244.7021484375, "logps/rejected": -355.6612854003906, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -1.8550450801849365, "rewards/margins": 13.770795822143555, "rewards/rejected": -15.62584114074707, "step": 2935 }, { "epoch": 1.0, "learning_rate": 1.0453065471718442e-06, "logits/chosen": -0.23894141614437103, "logits/rejected": -0.2356121987104416, "logps/chosen": -205.64358520507812, "logps/rejected": -383.5531921386719, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.5770903825759888, "rewards/margins": 16.333572387695312, "rewards/rejected": -16.910663604736328, "step": 2936 }, { "epoch": 1.0, "learning_rate": 1.0447543997712504e-06, "logits/chosen": -0.1782289296388626, "logits/rejected": -0.12486584484577179, "logps/chosen": -219.66708374023438, "logps/rejected": -280.7065124511719, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.03703676164150238, "rewards/margins": 13.153406143188477, "rewards/rejected": -13.11637020111084, "step": 2937 }, { "epoch": 1.0, "learning_rate": 1.044202238698805e-06, "logits/chosen": -0.1619909256696701, "logits/rejected": -0.13069742918014526, "logps/chosen": -150.5349578857422, "logps/rejected": -243.50437927246094, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.6021369695663452, "rewards/margins": 12.439177513122559, "rewards/rejected": -13.041313171386719, "step": 2938 }, { "epoch": 1.0, "learning_rate": 1.0436500641231859e-06, "logits/chosen": -0.21341556310653687, "logits/rejected": -0.17811255156993866, "logps/chosen": -175.02398681640625, "logps/rejected": -265.8061828613281, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.20253542065620422, "rewards/margins": 15.262198448181152, "rewards/rejected": -15.464734077453613, "step": 2939 }, { "epoch": 1.0, "learning_rate": 1.0430978762130742e-06, "logits/chosen": -0.34425869584083557, "logits/rejected": -0.32385602593421936, "logps/chosen": -212.0517578125, "logps/rejected": -355.32501220703125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.0194956064224243, "rewards/margins": 15.433022499084473, "rewards/rejected": -16.452518463134766, "step": 2940 }, { "epoch": 1.0, "learning_rate": 1.0425456751371562e-06, "logits/chosen": -0.2304270714521408, "logits/rejected": -0.172722727060318, "logps/chosen": -218.08438110351562, "logps/rejected": -299.569580078125, "loss": 0.0642, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5084922313690186, "rewards/margins": 14.017081260681152, "rewards/rejected": -14.52557373046875, "step": 2941 }, { "epoch": 1.0, "learning_rate": 1.041993461064122e-06, "logits/chosen": -0.23331229388713837, "logits/rejected": -0.21962809562683105, "logps/chosen": -146.83274841308594, "logps/rejected": -272.15216064453125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.2743042707443237, "rewards/margins": 12.300907135009766, "rewards/rejected": -13.575210571289062, "step": 2942 }, { "epoch": 1.0, "learning_rate": 1.0414412341626644e-06, "logits/chosen": -0.31822991371154785, "logits/rejected": -0.2581747770309448, "logps/chosen": -218.1893310546875, "logps/rejected": -358.31292724609375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.07512505352497101, "rewards/margins": 15.91893482208252, "rewards/rejected": -15.994060516357422, "step": 2943 }, { "epoch": 1.0, "learning_rate": 1.0408889946014819e-06, "logits/chosen": -0.14330649375915527, "logits/rejected": -0.1256636083126068, "logps/chosen": -135.27920532226562, "logps/rejected": -323.81072998046875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8979501724243164, "rewards/margins": 14.899755477905273, "rewards/rejected": -15.79770565032959, "step": 2944 }, { "epoch": 1.01, "learning_rate": 1.040336742549276e-06, "logits/chosen": -0.1535165160894394, "logits/rejected": -0.1181967630982399, "logps/chosen": -219.09835815429688, "logps/rejected": -360.7191467285156, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.8560723066329956, "rewards/margins": 17.6066951751709, "rewards/rejected": -18.4627685546875, "step": 2945 }, { "epoch": 1.01, "learning_rate": 1.0397844781747516e-06, "logits/chosen": -0.22122342884540558, "logits/rejected": -0.19197005033493042, "logps/chosen": -189.09378051757812, "logps/rejected": -332.59588623046875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.5861740112304688, "rewards/margins": 13.80721664428711, "rewards/rejected": -15.393390655517578, "step": 2946 }, { "epoch": 1.01, "learning_rate": 1.0392322016466181e-06, "logits/chosen": -0.22880218923091888, "logits/rejected": -0.19668985903263092, "logps/chosen": -211.02047729492188, "logps/rejected": -298.21246337890625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.256015658378601, "rewards/margins": 12.110992431640625, "rewards/rejected": -13.367009162902832, "step": 2947 }, { "epoch": 1.01, "learning_rate": 1.0386799131335887e-06, "logits/chosen": -0.2709715962409973, "logits/rejected": -0.25163403153419495, "logps/chosen": -163.3431396484375, "logps/rejected": -289.8318176269531, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 0.34978365898132324, "rewards/margins": 14.370891571044922, "rewards/rejected": -14.02110767364502, "step": 2948 }, { "epoch": 1.01, "learning_rate": 1.0381276128043794e-06, "logits/chosen": -0.22054064273834229, "logits/rejected": -0.19719253480434418, "logps/chosen": -211.14695739746094, "logps/rejected": -393.91290283203125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.708238124847412, "rewards/margins": 17.95718765258789, "rewards/rejected": -19.665424346923828, "step": 2949 }, { "epoch": 1.01, "learning_rate": 1.0375753008277109e-06, "logits/chosen": -0.22787843644618988, "logits/rejected": -0.1945713758468628, "logps/chosen": -266.3162841796875, "logps/rejected": -454.912353515625, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.6030339002609253, "rewards/margins": 19.032941818237305, "rewards/rejected": -20.635974884033203, "step": 2950 }, { "epoch": 1.01, "learning_rate": 1.0370229773723066e-06, "logits/chosen": -0.17089590430259705, "logits/rejected": -0.14967727661132812, "logps/chosen": -221.9912109375, "logps/rejected": -383.2213134765625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.21317854523658752, "rewards/margins": 16.508373260498047, "rewards/rejected": -16.72154998779297, "step": 2951 }, { "epoch": 1.01, "learning_rate": 1.036470642606893e-06, "logits/chosen": -0.24792411923408508, "logits/rejected": -0.21257267892360687, "logps/chosen": -249.74972534179688, "logps/rejected": -335.8422546386719, "loss": 0.0432, "rewards/accuracies": 0.875, "rewards/chosen": -0.3474371135234833, "rewards/margins": 12.100226402282715, "rewards/rejected": -12.447664260864258, "step": 2952 }, { "epoch": 1.01, "learning_rate": 1.0359182967002019e-06, "logits/chosen": -0.24711236357688904, "logits/rejected": -0.23287758231163025, "logps/chosen": -219.64224243164062, "logps/rejected": -360.4409484863281, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.504019021987915, "rewards/margins": 14.763498306274414, "rewards/rejected": -15.2675199508667, "step": 2953 }, { "epoch": 1.01, "learning_rate": 1.0353659398209669e-06, "logits/chosen": -0.21042697131633759, "logits/rejected": -0.1751174032688141, "logps/chosen": -184.62876892089844, "logps/rejected": -268.6302490234375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.6625471115112305, "rewards/margins": 11.263787269592285, "rewards/rejected": -10.601239204406738, "step": 2954 }, { "epoch": 1.01, "learning_rate": 1.034813572137925e-06, "logits/chosen": -0.13378223776817322, "logits/rejected": -0.12338968366384506, "logps/chosen": -189.32904052734375, "logps/rejected": -328.4951171875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.1801737546920776, "rewards/margins": 12.72571849822998, "rewards/rejected": -13.905891418457031, "step": 2955 }, { "epoch": 1.01, "learning_rate": 1.0342611938198173e-06, "logits/chosen": -0.2318604737520218, "logits/rejected": -0.2165517359972, "logps/chosen": -248.91500854492188, "logps/rejected": -412.294189453125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -2.2939932346343994, "rewards/margins": 16.790626525878906, "rewards/rejected": -19.084619522094727, "step": 2956 }, { "epoch": 1.01, "learning_rate": 1.033708805035388e-06, "logits/chosen": -0.24529553949832916, "logits/rejected": -0.21060626208782196, "logps/chosen": -246.8936309814453, "logps/rejected": -378.4268493652344, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.6321618556976318, "rewards/margins": 17.568788528442383, "rewards/rejected": -18.200950622558594, "step": 2957 }, { "epoch": 1.01, "learning_rate": 1.0331564059533835e-06, "logits/chosen": -0.10069075971841812, "logits/rejected": -0.08431170880794525, "logps/chosen": -183.7626495361328, "logps/rejected": -342.92779541015625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.740919828414917, "rewards/margins": 16.152700424194336, "rewards/rejected": -17.893619537353516, "step": 2958 }, { "epoch": 1.01, "learning_rate": 1.0326039967425548e-06, "logits/chosen": -0.14906930923461914, "logits/rejected": -0.13515689969062805, "logps/chosen": -152.0350799560547, "logps/rejected": -299.7398681640625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.7897822856903076, "rewards/margins": 13.960389137268066, "rewards/rejected": -15.75016975402832, "step": 2959 }, { "epoch": 1.01, "learning_rate": 1.0320515775716554e-06, "logits/chosen": -0.241064190864563, "logits/rejected": -0.19751890003681183, "logps/chosen": -161.58497619628906, "logps/rejected": -304.1661071777344, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.5151970982551575, "rewards/margins": 15.382568359375, "rewards/rejected": -15.897767066955566, "step": 2960 }, { "epoch": 1.01, "learning_rate": 1.031499148609441e-06, "logits/chosen": -0.139402374625206, "logits/rejected": -0.11695589125156403, "logps/chosen": -181.56475830078125, "logps/rejected": -300.2930603027344, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -0.6214593052864075, "rewards/margins": 14.145809173583984, "rewards/rejected": -14.767269134521484, "step": 2961 }, { "epoch": 1.01, "learning_rate": 1.030946710024671e-06, "logits/chosen": -0.335493803024292, "logits/rejected": -0.31421801447868347, "logps/chosen": -239.69874572753906, "logps/rejected": -431.0346984863281, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9768543243408203, "rewards/margins": 20.245407104492188, "rewards/rejected": -21.222261428833008, "step": 2962 }, { "epoch": 1.01, "learning_rate": 1.0303942619861087e-06, "logits/chosen": -0.21793346107006073, "logits/rejected": -0.17739035189151764, "logps/chosen": -172.52841186523438, "logps/rejected": -311.5733947753906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4658548831939697, "rewards/margins": 15.747264862060547, "rewards/rejected": -17.213117599487305, "step": 2963 }, { "epoch": 1.01, "learning_rate": 1.0298418046625188e-06, "logits/chosen": -0.27242451906204224, "logits/rejected": -0.2419460415840149, "logps/chosen": -189.26414489746094, "logps/rejected": -300.9442443847656, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.029040157794952393, "rewards/margins": 15.96753978729248, "rewards/rejected": -15.996578216552734, "step": 2964 }, { "epoch": 1.01, "learning_rate": 1.0292893382226692e-06, "logits/chosen": -0.24660368263721466, "logits/rejected": -0.23122678697109222, "logps/chosen": -211.63555908203125, "logps/rejected": -401.0193176269531, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.6433537602424622, "rewards/margins": 18.36164093017578, "rewards/rejected": -19.004993438720703, "step": 2965 }, { "epoch": 1.01, "learning_rate": 1.0287368628353313e-06, "logits/chosen": -0.20950652658939362, "logits/rejected": -0.18393954634666443, "logps/chosen": -118.12578582763672, "logps/rejected": -246.56689453125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.5080397129058838, "rewards/margins": 13.577592849731445, "rewards/rejected": -15.08563232421875, "step": 2966 }, { "epoch": 1.01, "learning_rate": 1.028184378669278e-06, "logits/chosen": -0.1977425217628479, "logits/rejected": -0.1778162121772766, "logps/chosen": -188.27894592285156, "logps/rejected": -288.811279296875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.26585352420806885, "rewards/margins": 11.244362831115723, "rewards/rejected": -11.510215759277344, "step": 2967 }, { "epoch": 1.01, "learning_rate": 1.0276318858932863e-06, "logits/chosen": -0.22231163084506989, "logits/rejected": -0.21817399561405182, "logps/chosen": -204.30262756347656, "logps/rejected": -404.10418701171875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.141283392906189, "rewards/margins": 16.909425735473633, "rewards/rejected": -18.050710678100586, "step": 2968 }, { "epoch": 1.01, "learning_rate": 1.0270793846761346e-06, "logits/chosen": -0.24738232791423798, "logits/rejected": -0.20054830610752106, "logps/chosen": -193.94525146484375, "logps/rejected": -321.9447021484375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.1507585048675537, "rewards/margins": 17.069080352783203, "rewards/rejected": -18.219837188720703, "step": 2969 }, { "epoch": 1.01, "learning_rate": 1.0265268751866045e-06, "logits/chosen": -0.24291068315505981, "logits/rejected": -0.19998256862163544, "logps/chosen": -203.14419555664062, "logps/rejected": -277.84063720703125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -2.5090689659118652, "rewards/margins": 12.269634246826172, "rewards/rejected": -14.778701782226562, "step": 2970 }, { "epoch": 1.01, "learning_rate": 1.02597435759348e-06, "logits/chosen": -0.2551057040691376, "logits/rejected": -0.24764059484004974, "logps/chosen": -225.58734130859375, "logps/rejected": -393.0562744140625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.4097504615783691, "rewards/margins": 17.287479400634766, "rewards/rejected": -18.697229385375977, "step": 2971 }, { "epoch": 1.01, "learning_rate": 1.025421832065548e-06, "logits/chosen": -0.13410446047782898, "logits/rejected": -0.09235336631536484, "logps/chosen": -184.19476318359375, "logps/rejected": -330.7236633300781, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.9124454259872437, "rewards/margins": 16.412899017333984, "rewards/rejected": -18.32534408569336, "step": 2972 }, { "epoch": 1.01, "learning_rate": 1.0248692987715972e-06, "logits/chosen": -0.24552986025810242, "logits/rejected": -0.22854647040367126, "logps/chosen": -174.27122497558594, "logps/rejected": -314.693115234375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -2.2856831550598145, "rewards/margins": 12.26260757446289, "rewards/rejected": -14.548291206359863, "step": 2973 }, { "epoch": 1.02, "learning_rate": 1.0243167578804185e-06, "logits/chosen": -0.2963590621948242, "logits/rejected": -0.27752527594566345, "logps/chosen": -243.92898559570312, "logps/rejected": -365.0929870605469, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.669535517692566, "rewards/margins": 14.088700294494629, "rewards/rejected": -15.758237838745117, "step": 2974 }, { "epoch": 1.02, "learning_rate": 1.0237642095608062e-06, "logits/chosen": -0.18505223095417023, "logits/rejected": -0.18648654222488403, "logps/chosen": -137.30230712890625, "logps/rejected": -308.795166015625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8925997018814087, "rewards/margins": 13.941547393798828, "rewards/rejected": -14.834146499633789, "step": 2975 }, { "epoch": 1.02, "learning_rate": 1.0232116539815556e-06, "logits/chosen": -0.2404928058385849, "logits/rejected": -0.1866685450077057, "logps/chosen": -224.32965087890625, "logps/rejected": -315.54998779296875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.794498920440674, "rewards/margins": 13.976344108581543, "rewards/rejected": -16.770843505859375, "step": 2976 }, { "epoch": 1.02, "learning_rate": 1.022659091311465e-06, "logits/chosen": -0.19211052358150482, "logits/rejected": -0.1857471615076065, "logps/chosen": -212.41943359375, "logps/rejected": -355.2340087890625, "loss": 0.0835, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6964956521987915, "rewards/margins": 15.545822143554688, "rewards/rejected": -16.24231719970703, "step": 2977 }, { "epoch": 1.02, "learning_rate": 1.0221065217193353e-06, "logits/chosen": -0.22719810903072357, "logits/rejected": -0.1884763091802597, "logps/chosen": -188.36734008789062, "logps/rejected": -354.6334533691406, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.944052815437317, "rewards/margins": 14.847033500671387, "rewards/rejected": -16.791086196899414, "step": 2978 }, { "epoch": 1.02, "learning_rate": 1.0215539453739674e-06, "logits/chosen": -0.337627112865448, "logits/rejected": -0.2898312211036682, "logps/chosen": -223.271484375, "logps/rejected": -314.60321044921875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.2438368797302246, "rewards/margins": 12.750391006469727, "rewards/rejected": -14.994227409362793, "step": 2979 }, { "epoch": 1.02, "learning_rate": 1.0210013624441668e-06, "logits/chosen": -0.19047494232654572, "logits/rejected": -0.1570436954498291, "logps/chosen": -214.02178955078125, "logps/rejected": -392.0958251953125, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -1.432033658027649, "rewards/margins": 16.690141677856445, "rewards/rejected": -18.122177124023438, "step": 2980 }, { "epoch": 1.02, "learning_rate": 1.0204487730987403e-06, "logits/chosen": -0.1991867870092392, "logits/rejected": -0.17401842772960663, "logps/chosen": -174.68788146972656, "logps/rejected": -294.06463623046875, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -2.0882492065429688, "rewards/margins": 13.122822761535645, "rewards/rejected": -15.211071968078613, "step": 2981 }, { "epoch": 1.02, "learning_rate": 1.0198961775064954e-06, "logits/chosen": -0.2445518672466278, "logits/rejected": -0.20213714241981506, "logps/chosen": -230.53219604492188, "logps/rejected": -360.5450744628906, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.030001163482666, "rewards/margins": 14.60770320892334, "rewards/rejected": -15.637704849243164, "step": 2982 }, { "epoch": 1.02, "learning_rate": 1.0193435758362431e-06, "logits/chosen": -0.1965075433254242, "logits/rejected": -0.161540225148201, "logps/chosen": -187.5902862548828, "logps/rejected": -307.35260009765625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.6228904724121094, "rewards/margins": 14.715242385864258, "rewards/rejected": -15.338132858276367, "step": 2983 }, { "epoch": 1.02, "learning_rate": 1.0187909682567953e-06, "logits/chosen": -0.21383918821811676, "logits/rejected": -0.19009439647197723, "logps/chosen": -199.78428649902344, "logps/rejected": -293.45562744140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3910911083221436, "rewards/margins": 13.802376747131348, "rewards/rejected": -15.193467140197754, "step": 2984 }, { "epoch": 1.02, "learning_rate": 1.0182383549369659e-06, "logits/chosen": -0.2894449234008789, "logits/rejected": -0.25433653593063354, "logps/chosen": -227.13380432128906, "logps/rejected": -358.6289367675781, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -1.14724600315094, "rewards/margins": 17.440696716308594, "rewards/rejected": -18.587942123413086, "step": 2985 }, { "epoch": 1.02, "learning_rate": 1.0176857360455705e-06, "logits/chosen": -0.24562807381153107, "logits/rejected": -0.23553860187530518, "logps/chosen": -201.1894989013672, "logps/rejected": -324.7881774902344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2001885175704956, "rewards/margins": 11.417723655700684, "rewards/rejected": -12.617912292480469, "step": 2986 }, { "epoch": 1.02, "learning_rate": 1.0171331117514274e-06, "logits/chosen": -0.1506452113389969, "logits/rejected": -0.13111764192581177, "logps/chosen": -174.89755249023438, "logps/rejected": -333.4686279296875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.8580925464630127, "rewards/margins": 15.356590270996094, "rewards/rejected": -16.21468162536621, "step": 2987 }, { "epoch": 1.02, "learning_rate": 1.0165804822233548e-06, "logits/chosen": -0.1876036375761032, "logits/rejected": -0.1693717986345291, "logps/chosen": -245.96524047851562, "logps/rejected": -361.3583984375, "loss": 0.0199, "rewards/accuracies": 0.9375, "rewards/chosen": -2.474879264831543, "rewards/margins": 12.187650680541992, "rewards/rejected": -14.662529945373535, "step": 2988 }, { "epoch": 1.02, "learning_rate": 1.0160278476301739e-06, "logits/chosen": -0.272842675447464, "logits/rejected": -0.2320920079946518, "logps/chosen": -205.41494750976562, "logps/rejected": -325.91290283203125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.7999253869056702, "rewards/margins": 11.937618255615234, "rewards/rejected": -12.737543106079102, "step": 2989 }, { "epoch": 1.02, "learning_rate": 1.0154752081407065e-06, "logits/chosen": -0.25887414813041687, "logits/rejected": -0.24319970607757568, "logps/chosen": -186.0784149169922, "logps/rejected": -325.1087341308594, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.6199217438697815, "rewards/margins": 12.952156066894531, "rewards/rejected": -13.572078704833984, "step": 2990 }, { "epoch": 1.02, "learning_rate": 1.0149225639237765e-06, "logits/chosen": -0.2502923905849457, "logits/rejected": -0.24637708067893982, "logps/chosen": -199.4058380126953, "logps/rejected": -379.1583251953125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.9511067867279053, "rewards/margins": 17.03325843811035, "rewards/rejected": -18.984365463256836, "step": 2991 }, { "epoch": 1.02, "learning_rate": 1.0143699151482092e-06, "logits/chosen": -0.30190345644950867, "logits/rejected": -0.26340943574905396, "logps/chosen": -249.51695251464844, "logps/rejected": -429.6383056640625, "loss": 0.0226, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9448726177215576, "rewards/margins": 18.12216567993164, "rewards/rejected": -20.06703758239746, "step": 2992 }, { "epoch": 1.02, "learning_rate": 1.0138172619828314e-06, "logits/chosen": -0.23704738914966583, "logits/rejected": -0.21510592103004456, "logps/chosen": -237.8246612548828, "logps/rejected": -326.1205749511719, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.388270616531372, "rewards/margins": 11.763739585876465, "rewards/rejected": -13.15201187133789, "step": 2993 }, { "epoch": 1.02, "learning_rate": 1.0132646045964705e-06, "logits/chosen": -0.3033776581287384, "logits/rejected": -0.28859743475914, "logps/chosen": -188.80026245117188, "logps/rejected": -331.13580322265625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.218562126159668, "rewards/margins": 12.22055721282959, "rewards/rejected": -14.439118385314941, "step": 2994 }, { "epoch": 1.02, "learning_rate": 1.0127119431579558e-06, "logits/chosen": -0.185896098613739, "logits/rejected": -0.17152994871139526, "logps/chosen": -157.62368774414062, "logps/rejected": -288.8443908691406, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6316299438476562, "rewards/margins": 13.781578063964844, "rewards/rejected": -14.413207054138184, "step": 2995 }, { "epoch": 1.02, "learning_rate": 1.0121592778361183e-06, "logits/chosen": -0.21360455453395844, "logits/rejected": -0.19481079280376434, "logps/chosen": -251.77699279785156, "logps/rejected": -448.082763671875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.6314401626586914, "rewards/margins": 17.91474723815918, "rewards/rejected": -18.546188354492188, "step": 2996 }, { "epoch": 1.02, "learning_rate": 1.0116066087997893e-06, "logits/chosen": -0.22776013612747192, "logits/rejected": -0.19537706673145294, "logps/chosen": -220.47125244140625, "logps/rejected": -391.9503479003906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.6745373010635376, "rewards/margins": 18.340044021606445, "rewards/rejected": -20.01457977294922, "step": 2997 }, { "epoch": 1.02, "learning_rate": 1.0110539362178009e-06, "logits/chosen": -0.1821749061346054, "logits/rejected": -0.1421629786491394, "logps/chosen": -191.62075805664062, "logps/rejected": -348.4278564453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7859667539596558, "rewards/margins": 17.096050262451172, "rewards/rejected": -17.882017135620117, "step": 2998 }, { "epoch": 1.02, "learning_rate": 1.010501260258988e-06, "logits/chosen": -0.170306995511055, "logits/rejected": -0.14840051531791687, "logps/chosen": -204.95004272460938, "logps/rejected": -275.53472900390625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.8544301986694336, "rewards/margins": 11.714125633239746, "rewards/rejected": -13.56855583190918, "step": 2999 }, { "epoch": 1.02, "learning_rate": 1.0099485810921847e-06, "logits/chosen": -0.25392210483551025, "logits/rejected": -0.22308260202407837, "logps/chosen": -193.23944091796875, "logps/rejected": -324.21307373046875, "loss": 0.0753, "rewards/accuracies": 0.875, "rewards/chosen": -1.7492419481277466, "rewards/margins": 13.247425079345703, "rewards/rejected": -14.996667861938477, "step": 3000 }, { "epoch": 1.02, "learning_rate": 1.0093958988862272e-06, "logits/chosen": -0.2336057424545288, "logits/rejected": -0.2097427248954773, "logps/chosen": -208.1882781982422, "logps/rejected": -335.4343566894531, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.754800796508789, "rewards/margins": 13.714698791503906, "rewards/rejected": -15.469501495361328, "step": 3001 }, { "epoch": 1.02, "learning_rate": 1.0088432138099527e-06, "logits/chosen": -0.2753112316131592, "logits/rejected": -0.23070213198661804, "logps/chosen": -197.2276153564453, "logps/rejected": -296.73590087890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6295002102851868, "rewards/margins": 14.0778169631958, "rewards/rejected": -14.707317352294922, "step": 3002 }, { "epoch": 1.02, "learning_rate": 1.008290526032198e-06, "logits/chosen": -0.19916932284832, "logits/rejected": -0.1813872754573822, "logps/chosen": -141.70944213867188, "logps/rejected": -317.34893798828125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2406824827194214, "rewards/margins": 15.747441291809082, "rewards/rejected": -16.98812484741211, "step": 3003 }, { "epoch": 1.03, "learning_rate": 1.0077378357218021e-06, "logits/chosen": -0.2676706612110138, "logits/rejected": -0.24019375443458557, "logps/chosen": -181.74392700195312, "logps/rejected": -341.0715637207031, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -2.175971746444702, "rewards/margins": 16.197540283203125, "rewards/rejected": -18.373512268066406, "step": 3004 }, { "epoch": 1.03, "learning_rate": 1.0071851430476042e-06, "logits/chosen": -0.23639655113220215, "logits/rejected": -0.19852431118488312, "logps/chosen": -250.98757934570312, "logps/rejected": -393.39617919921875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2416149377822876, "rewards/margins": 17.12428855895996, "rewards/rejected": -18.365903854370117, "step": 3005 }, { "epoch": 1.03, "learning_rate": 1.006632448178444e-06, "logits/chosen": -0.21258744597434998, "logits/rejected": -0.1643957942724228, "logps/chosen": -188.33973693847656, "logps/rejected": -293.34527587890625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.5137964487075806, "rewards/margins": 14.363426208496094, "rewards/rejected": -15.877222061157227, "step": 3006 }, { "epoch": 1.03, "learning_rate": 1.0060797512831628e-06, "logits/chosen": -0.18301042914390564, "logits/rejected": -0.12512972950935364, "logps/chosen": -241.55535888671875, "logps/rejected": -321.29864501953125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.20589189231395721, "rewards/margins": 16.175230026245117, "rewards/rejected": -16.381122589111328, "step": 3007 }, { "epoch": 1.03, "learning_rate": 1.0055270525306014e-06, "logits/chosen": -0.2473635971546173, "logits/rejected": -0.24246887862682343, "logps/chosen": -225.880615234375, "logps/rejected": -395.2393798828125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.8297151327133179, "rewards/margins": 16.620494842529297, "rewards/rejected": -17.450210571289062, "step": 3008 }, { "epoch": 1.03, "learning_rate": 1.0049743520896017e-06, "logits/chosen": -0.13724270462989807, "logits/rejected": -0.1315164715051651, "logps/chosen": -168.02871704101562, "logps/rejected": -323.2724609375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1262818574905396, "rewards/margins": 15.296798706054688, "rewards/rejected": -16.423080444335938, "step": 3009 }, { "epoch": 1.03, "learning_rate": 1.004421650129006e-06, "logits/chosen": -0.23868656158447266, "logits/rejected": -0.20786148309707642, "logps/chosen": -229.9573974609375, "logps/rejected": -382.8407897949219, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 0.05844634771347046, "rewards/margins": 17.2104434967041, "rewards/rejected": -17.151996612548828, "step": 3010 }, { "epoch": 1.03, "learning_rate": 1.0038689468176573e-06, "logits/chosen": -0.20015476644039154, "logits/rejected": -0.19938991963863373, "logps/chosen": -177.14321899414062, "logps/rejected": -382.3431396484375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.2232537269592285, "rewards/margins": 14.04433822631836, "rewards/rejected": -16.26759147644043, "step": 3011 }, { "epoch": 1.03, "learning_rate": 1.0033162423243986e-06, "logits/chosen": -0.0805305615067482, "logits/rejected": -0.048077166080474854, "logps/chosen": -165.1287384033203, "logps/rejected": -335.46441650390625, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.40443503856658936, "rewards/margins": 17.602962493896484, "rewards/rejected": -18.00739860534668, "step": 3012 }, { "epoch": 1.03, "learning_rate": 1.0027635368180736e-06, "logits/chosen": -0.19727317988872528, "logits/rejected": -0.16654415428638458, "logps/chosen": -236.85977172851562, "logps/rejected": -406.23345947265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8877452611923218, "rewards/margins": 18.08893585205078, "rewards/rejected": -18.976680755615234, "step": 3013 }, { "epoch": 1.03, "learning_rate": 1.0022108304675266e-06, "logits/chosen": -0.1698949784040451, "logits/rejected": -0.16033212840557098, "logps/chosen": -195.48680114746094, "logps/rejected": -363.81573486328125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -2.023867607116699, "rewards/margins": 15.46967887878418, "rewards/rejected": -17.493545532226562, "step": 3014 }, { "epoch": 1.03, "learning_rate": 1.001658123441601e-06, "logits/chosen": -0.38594985008239746, "logits/rejected": -0.3561965227127075, "logps/chosen": -241.9239044189453, "logps/rejected": -417.7294616699219, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0363988876342773, "rewards/margins": 18.12712860107422, "rewards/rejected": -20.163524627685547, "step": 3015 }, { "epoch": 1.03, "learning_rate": 1.001105415909142e-06, "logits/chosen": -0.14358751475811005, "logits/rejected": -0.1085931733250618, "logps/chosen": -162.2146453857422, "logps/rejected": -222.76263427734375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0254874229431152, "rewards/margins": 11.982301712036133, "rewards/rejected": -13.007789611816406, "step": 3016 }, { "epoch": 1.03, "learning_rate": 1.0005527080389933e-06, "logits/chosen": -0.18526378273963928, "logits/rejected": -0.15397502481937408, "logps/chosen": -177.53286743164062, "logps/rejected": -345.724365234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.16713768243789673, "rewards/margins": 18.49395751953125, "rewards/rejected": -18.661094665527344, "step": 3017 }, { "epoch": 1.03, "learning_rate": 1e-06, "logits/chosen": -0.21994061768054962, "logits/rejected": -0.1706138700246811, "logps/chosen": -229.4658203125, "logps/rejected": -394.4468994140625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.6942748427391052, "rewards/margins": 17.2277889251709, "rewards/rejected": -17.92206573486328, "step": 3018 }, { "epoch": 1.03, "learning_rate": 9.994472919610068e-07, "logits/chosen": -0.28217804431915283, "logits/rejected": -0.22239543497562408, "logps/chosen": -239.54380798339844, "logps/rejected": -359.0157775878906, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -2.2820658683776855, "rewards/margins": 13.656662940979004, "rewards/rejected": -15.938728332519531, "step": 3019 }, { "epoch": 1.03, "learning_rate": 9.988945840908582e-07, "logits/chosen": -0.2770176827907562, "logits/rejected": -0.2581433355808258, "logps/chosen": -168.51634216308594, "logps/rejected": -245.2829132080078, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6098456382751465, "rewards/margins": 10.823400497436523, "rewards/rejected": -12.433244705200195, "step": 3020 }, { "epoch": 1.03, "learning_rate": 9.983418765583992e-07, "logits/chosen": -0.232344850897789, "logits/rejected": -0.18334773182868958, "logps/chosen": -191.74700927734375, "logps/rejected": -341.2066650390625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.7020028829574585, "rewards/margins": 15.401741027832031, "rewards/rejected": -17.103744506835938, "step": 3021 }, { "epoch": 1.03, "learning_rate": 9.977891695324735e-07, "logits/chosen": -0.14756794273853302, "logits/rejected": -0.13484081625938416, "logps/chosen": -172.49066162109375, "logps/rejected": -333.4587097167969, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.7676005363464355, "rewards/margins": 14.736612319946289, "rewards/rejected": -16.504215240478516, "step": 3022 }, { "epoch": 1.03, "learning_rate": 9.972364631819264e-07, "logits/chosen": -0.11474282294511795, "logits/rejected": -0.10960159450769424, "logps/chosen": -195.35272216796875, "logps/rejected": -346.28350830078125, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.20718954503536224, "rewards/margins": 15.606529235839844, "rewards/rejected": -15.813718795776367, "step": 3023 }, { "epoch": 1.03, "learning_rate": 9.966837576756015e-07, "logits/chosen": -0.2314659059047699, "logits/rejected": -0.1883460134267807, "logps/chosen": -234.57171630859375, "logps/rejected": -331.6065368652344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7275532484054565, "rewards/margins": 15.024949073791504, "rewards/rejected": -15.752501487731934, "step": 3024 }, { "epoch": 1.03, "learning_rate": 9.961310531823427e-07, "logits/chosen": -0.10064321756362915, "logits/rejected": -0.06571125984191895, "logps/chosen": -169.1300811767578, "logps/rejected": -252.2733612060547, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.5856857299804688, "rewards/margins": 12.287395477294922, "rewards/rejected": -13.873082160949707, "step": 3025 }, { "epoch": 1.03, "learning_rate": 9.955783498709942e-07, "logits/chosen": -0.16296149790287018, "logits/rejected": -0.16055811941623688, "logps/chosen": -171.21893310546875, "logps/rejected": -314.0205078125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9653354287147522, "rewards/margins": 12.507248878479004, "rewards/rejected": -13.47258472442627, "step": 3026 }, { "epoch": 1.03, "learning_rate": 9.950256479103982e-07, "logits/chosen": -0.14134421944618225, "logits/rejected": -0.14066842198371887, "logps/chosen": -190.53367614746094, "logps/rejected": -344.8051452636719, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.8438583612442017, "rewards/margins": 12.858503341674805, "rewards/rejected": -14.702362060546875, "step": 3027 }, { "epoch": 1.03, "learning_rate": 9.944729474693985e-07, "logits/chosen": -0.19590318202972412, "logits/rejected": -0.1460990309715271, "logps/chosen": -208.70083618164062, "logps/rejected": -317.3310852050781, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.490671843290329, "rewards/margins": 14.159486770629883, "rewards/rejected": -14.650158882141113, "step": 3028 }, { "epoch": 1.03, "learning_rate": 9.939202487168373e-07, "logits/chosen": -0.21221080422401428, "logits/rejected": -0.15035566687583923, "logps/chosen": -306.401123046875, "logps/rejected": -381.44500732421875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.6341140270233154, "rewards/margins": 15.970335006713867, "rewards/rejected": -16.604450225830078, "step": 3029 }, { "epoch": 1.03, "learning_rate": 9.933675518215557e-07, "logits/chosen": -0.11718416213989258, "logits/rejected": -0.10682469606399536, "logps/chosen": -248.19300842285156, "logps/rejected": -397.5902404785156, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.645635724067688, "rewards/margins": 16.704700469970703, "rewards/rejected": -17.3503360748291, "step": 3030 }, { "epoch": 1.03, "learning_rate": 9.92814856952396e-07, "logits/chosen": -0.2355033904314041, "logits/rejected": -0.19543403387069702, "logps/chosen": -205.0269775390625, "logps/rejected": -406.2313232421875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0920017957687378, "rewards/margins": 18.282325744628906, "rewards/rejected": -19.374326705932617, "step": 3031 }, { "epoch": 1.03, "learning_rate": 9.922621642781978e-07, "logits/chosen": -0.17166440188884735, "logits/rejected": -0.13481080532073975, "logps/chosen": -194.46417236328125, "logps/rejected": -361.11376953125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4814263582229614, "rewards/margins": 16.521947860717773, "rewards/rejected": -18.003374099731445, "step": 3032 }, { "epoch": 1.04, "learning_rate": 9.91709473967802e-07, "logits/chosen": -0.2692485451698303, "logits/rejected": -0.2190982550382614, "logps/chosen": -255.48329162597656, "logps/rejected": -335.23272705078125, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.27146434783935547, "rewards/margins": 14.359881401062012, "rewards/rejected": -14.631345748901367, "step": 3033 }, { "epoch": 1.04, "learning_rate": 9.911567861900476e-07, "logits/chosen": -0.1875806301832199, "logits/rejected": -0.16196346282958984, "logps/chosen": -236.81295776367188, "logps/rejected": -344.0286560058594, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.06714783608913422, "rewards/margins": 15.088815689086914, "rewards/rejected": -15.155963897705078, "step": 3034 }, { "epoch": 1.04, "learning_rate": 9.906041011137727e-07, "logits/chosen": -0.08845730870962143, "logits/rejected": -0.05980618670582771, "logps/chosen": -163.23011779785156, "logps/rejected": -317.8496398925781, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.548851490020752, "rewards/margins": 16.40134620666504, "rewards/rejected": -16.950197219848633, "step": 3035 }, { "epoch": 1.04, "learning_rate": 9.900514189078154e-07, "logits/chosen": -0.2057369202375412, "logits/rejected": -0.15950161218643188, "logps/chosen": -208.6392059326172, "logps/rejected": -386.3481750488281, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.5615185499191284, "rewards/margins": 19.70361328125, "rewards/rejected": -20.2651309967041, "step": 3036 }, { "epoch": 1.04, "learning_rate": 9.894987397410122e-07, "logits/chosen": -0.22102519869804382, "logits/rejected": -0.20590047538280487, "logps/chosen": -197.26669311523438, "logps/rejected": -360.49462890625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.4852590560913086, "rewards/margins": 16.298429489135742, "rewards/rejected": -16.783689498901367, "step": 3037 }, { "epoch": 1.04, "learning_rate": 9.889460637821992e-07, "logits/chosen": -0.22989784181118011, "logits/rejected": -0.21249817311763763, "logps/chosen": -231.04739379882812, "logps/rejected": -355.9742431640625, "loss": 0.0226, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3818501234054565, "rewards/margins": 12.665021896362305, "rewards/rejected": -14.046873092651367, "step": 3038 }, { "epoch": 1.04, "learning_rate": 9.88393391200211e-07, "logits/chosen": -0.19620294868946075, "logits/rejected": -0.15861427783966064, "logps/chosen": -167.95753479003906, "logps/rejected": -278.49420166015625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.6870747804641724, "rewards/margins": 14.063803672790527, "rewards/rejected": -14.750879287719727, "step": 3039 }, { "epoch": 1.04, "learning_rate": 9.878407221638816e-07, "logits/chosen": -0.16114108264446259, "logits/rejected": -0.12942272424697876, "logps/chosen": -242.21156311035156, "logps/rejected": -388.7965087890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.12795370817184448, "rewards/margins": 17.009246826171875, "rewards/rejected": -16.88129425048828, "step": 3040 }, { "epoch": 1.04, "learning_rate": 9.872880568420443e-07, "logits/chosen": -0.2566482126712799, "logits/rejected": -0.2466057538986206, "logps/chosen": -220.15194702148438, "logps/rejected": -371.79364013671875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -1.0121463537216187, "rewards/margins": 14.746488571166992, "rewards/rejected": -15.758634567260742, "step": 3041 }, { "epoch": 1.04, "learning_rate": 9.867353954035299e-07, "logits/chosen": -0.2718604505062103, "logits/rejected": -0.24625663459300995, "logps/chosen": -260.8822326660156, "logps/rejected": -430.27655029296875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -2.1695353984832764, "rewards/margins": 16.781173706054688, "rewards/rejected": -18.95071029663086, "step": 3042 }, { "epoch": 1.04, "learning_rate": 9.861827380171686e-07, "logits/chosen": -0.2395256906747818, "logits/rejected": -0.19664669036865234, "logps/chosen": -206.4909210205078, "logps/rejected": -312.3520812988281, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.185294508934021, "rewards/margins": 14.310131072998047, "rewards/rejected": -15.4954252243042, "step": 3043 }, { "epoch": 1.04, "learning_rate": 9.85630084851791e-07, "logits/chosen": -0.13319413363933563, "logits/rejected": -0.12531711161136627, "logps/chosen": -163.1503143310547, "logps/rejected": -317.77447509765625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -2.3857603073120117, "rewards/margins": 13.623075485229492, "rewards/rejected": -16.00883674621582, "step": 3044 }, { "epoch": 1.04, "learning_rate": 9.850774360762234e-07, "logits/chosen": -0.15142419934272766, "logits/rejected": -0.1410755217075348, "logps/chosen": -148.66519165039062, "logps/rejected": -342.4107360839844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.0353593826293945, "rewards/margins": 15.321377754211426, "rewards/rejected": -18.35673713684082, "step": 3045 }, { "epoch": 1.04, "learning_rate": 9.845247918592936e-07, "logits/chosen": -0.18750621378421783, "logits/rejected": -0.17383363842964172, "logps/chosen": -192.8421630859375, "logps/rejected": -356.9888000488281, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.780309796333313, "rewards/margins": 15.007478713989258, "rewards/rejected": -16.78778839111328, "step": 3046 }, { "epoch": 1.04, "learning_rate": 9.839721523698262e-07, "logits/chosen": -0.09574353694915771, "logits/rejected": -0.07254011929035187, "logps/chosen": -197.70022583007812, "logps/rejected": -328.58160400390625, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -1.4238590002059937, "rewards/margins": 13.574031829833984, "rewards/rejected": -14.99789047241211, "step": 3047 }, { "epoch": 1.04, "learning_rate": 9.83419517776645e-07, "logits/chosen": -0.17012862861156464, "logits/rejected": -0.13414078950881958, "logps/chosen": -234.9182891845703, "logps/rejected": -387.2203063964844, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -2.5557150840759277, "rewards/margins": 17.24273109436035, "rewards/rejected": -19.798446655273438, "step": 3048 }, { "epoch": 1.04, "learning_rate": 9.828668882485727e-07, "logits/chosen": -0.248368039727211, "logits/rejected": -0.20072445273399353, "logps/chosen": -162.85829162597656, "logps/rejected": -272.8964538574219, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.3653759956359863, "rewards/margins": 12.852731704711914, "rewards/rejected": -14.218107223510742, "step": 3049 }, { "epoch": 1.04, "learning_rate": 9.823142639544292e-07, "logits/chosen": -0.12040778249502182, "logits/rejected": -0.09064395725727081, "logps/chosen": -220.39971923828125, "logps/rejected": -396.4784240722656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.7539582252502441, "rewards/margins": 17.50575065612793, "rewards/rejected": -19.25971031188965, "step": 3050 }, { "epoch": 1.04, "learning_rate": 9.817616450630342e-07, "logits/chosen": -0.20895078778266907, "logits/rejected": -0.18236111104488373, "logps/chosen": -194.6458740234375, "logps/rejected": -334.63311767578125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6137935519218445, "rewards/margins": 15.824753761291504, "rewards/rejected": -16.438549041748047, "step": 3051 }, { "epoch": 1.04, "learning_rate": 9.81209031743205e-07, "logits/chosen": -0.2612856924533844, "logits/rejected": -0.26426398754119873, "logps/chosen": -200.88034057617188, "logps/rejected": -304.38232421875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.9772984981536865, "rewards/margins": 10.067334175109863, "rewards/rejected": -12.044632911682129, "step": 3052 }, { "epoch": 1.04, "learning_rate": 9.80656424163757e-07, "logits/chosen": -0.15910448133945465, "logits/rejected": -0.14657965302467346, "logps/chosen": -244.20928955078125, "logps/rejected": -369.4093933105469, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.0452039241790771, "rewards/margins": 13.848020553588867, "rewards/rejected": -14.893224716186523, "step": 3053 }, { "epoch": 1.04, "learning_rate": 9.801038224935045e-07, "logits/chosen": -0.05460195988416672, "logits/rejected": -0.04202975332736969, "logps/chosen": -175.9880828857422, "logps/rejected": -332.53680419921875, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.8709592819213867, "rewards/margins": 13.59416389465332, "rewards/rejected": -15.465124130249023, "step": 3054 }, { "epoch": 1.04, "learning_rate": 9.795512269012594e-07, "logits/chosen": -0.11647769063711166, "logits/rejected": -0.09724069386720657, "logps/chosen": -113.41924285888672, "logps/rejected": -223.7730712890625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.4720330238342285, "rewards/margins": 9.935083389282227, "rewards/rejected": -11.407119750976562, "step": 3055 }, { "epoch": 1.04, "learning_rate": 9.78998637555833e-07, "logits/chosen": -0.1286982148885727, "logits/rejected": -0.09234919399023056, "logps/chosen": -196.68409729003906, "logps/rejected": -296.14520263671875, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": -2.065402030944824, "rewards/margins": 13.97827434539795, "rewards/rejected": -16.043676376342773, "step": 3056 }, { "epoch": 1.04, "learning_rate": 9.784460546260327e-07, "logits/chosen": -0.23264911770820618, "logits/rejected": -0.20549604296684265, "logps/chosen": -161.4665985107422, "logps/rejected": -325.24603271484375, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.3332064151763916, "rewards/margins": 13.778656005859375, "rewards/rejected": -15.111862182617188, "step": 3057 }, { "epoch": 1.04, "learning_rate": 9.778934782806648e-07, "logits/chosen": -0.13899648189544678, "logits/rejected": -0.14416338503360748, "logps/chosen": -165.6053924560547, "logps/rejected": -341.4591064453125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.981954574584961, "rewards/margins": 13.812379837036133, "rewards/rejected": -16.794334411621094, "step": 3058 }, { "epoch": 1.04, "learning_rate": 9.77340908688535e-07, "logits/chosen": -0.0742907002568245, "logits/rejected": -0.028028953820466995, "logps/chosen": -187.3181915283203, "logps/rejected": -360.94293212890625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.491581916809082, "rewards/margins": 17.37758445739746, "rewards/rejected": -18.869165420532227, "step": 3059 }, { "epoch": 1.04, "learning_rate": 9.767883460184441e-07, "logits/chosen": -0.25289028882980347, "logits/rejected": -0.20372658967971802, "logps/chosen": -282.4939270019531, "logps/rejected": -432.4317626953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.1924707293510437, "rewards/margins": 21.442190170288086, "rewards/rejected": -21.634658813476562, "step": 3060 }, { "epoch": 1.04, "learning_rate": 9.762357904391939e-07, "logits/chosen": -0.08885563164949417, "logits/rejected": -0.050381969660520554, "logps/chosen": -207.45204162597656, "logps/rejected": -353.5301513671875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.8324007987976074, "rewards/margins": 15.587743759155273, "rewards/rejected": -18.42014503479004, "step": 3061 }, { "epoch": 1.05, "learning_rate": 9.756832421195814e-07, "logits/chosen": -0.19443397223949432, "logits/rejected": -0.14866474270820618, "logps/chosen": -205.54812622070312, "logps/rejected": -316.6396484375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.614097535610199, "rewards/margins": 15.549798965454102, "rewards/rejected": -16.163898468017578, "step": 3062 }, { "epoch": 1.05, "learning_rate": 9.751307012284027e-07, "logits/chosen": -0.1925150603055954, "logits/rejected": -0.14373672008514404, "logps/chosen": -254.61184692382812, "logps/rejected": -383.07501220703125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.2796077728271484, "rewards/margins": 17.311382293701172, "rewards/rejected": -18.590991973876953, "step": 3063 }, { "epoch": 1.05, "learning_rate": 9.745781679344519e-07, "logits/chosen": -0.12501364946365356, "logits/rejected": -0.09668019413948059, "logps/chosen": -177.71926879882812, "logps/rejected": -355.68096923828125, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.1727651357650757, "rewards/margins": 16.280227661132812, "rewards/rejected": -17.452993392944336, "step": 3064 }, { "epoch": 1.05, "learning_rate": 9.740256424065197e-07, "logits/chosen": -0.1884702891111374, "logits/rejected": -0.16980911791324615, "logps/chosen": -186.7607879638672, "logps/rejected": -369.418212890625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0733485221862793, "rewards/margins": 17.723241806030273, "rewards/rejected": -18.79659080505371, "step": 3065 }, { "epoch": 1.05, "learning_rate": 9.734731248133956e-07, "logits/chosen": -0.3237001895904541, "logits/rejected": -0.28279680013656616, "logps/chosen": -197.4778289794922, "logps/rejected": -404.4801330566406, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.9762096405029297, "rewards/margins": 20.287128448486328, "rewards/rejected": -22.263338088989258, "step": 3066 }, { "epoch": 1.05, "learning_rate": 9.729206153238655e-07, "logits/chosen": -0.15228641033172607, "logits/rejected": -0.12311993539333344, "logps/chosen": -200.7812957763672, "logps/rejected": -288.5054626464844, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.1363455057144165, "rewards/margins": 11.916203498840332, "rewards/rejected": -13.052547454833984, "step": 3067 }, { "epoch": 1.05, "learning_rate": 9.723681141067138e-07, "logits/chosen": -0.21252180635929108, "logits/rejected": -0.21056967973709106, "logps/chosen": -224.33229064941406, "logps/rejected": -426.1135559082031, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1235971450805664, "rewards/margins": 17.126523971557617, "rewards/rejected": -18.2501220703125, "step": 3068 }, { "epoch": 1.05, "learning_rate": 9.71815621330722e-07, "logits/chosen": -0.15943937003612518, "logits/rejected": -0.11830492317676544, "logps/chosen": -158.50863647460938, "logps/rejected": -280.5957946777344, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.4099795818328857, "rewards/margins": 13.590295791625977, "rewards/rejected": -15.000275611877441, "step": 3069 }, { "epoch": 1.05, "learning_rate": 9.712631371646686e-07, "logits/chosen": -0.26055416464805603, "logits/rejected": -0.2224002331495285, "logps/chosen": -209.3285675048828, "logps/rejected": -368.6134338378906, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.673558235168457, "rewards/margins": 16.14044189453125, "rewards/rejected": -17.81399917602539, "step": 3070 }, { "epoch": 1.05, "learning_rate": 9.707106617773307e-07, "logits/chosen": -0.13894402980804443, "logits/rejected": -0.10778018087148666, "logps/chosen": -237.23255920410156, "logps/rejected": -393.08282470703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.185496211051941, "rewards/margins": 18.95660400390625, "rewards/rejected": -20.142099380493164, "step": 3071 }, { "epoch": 1.05, "learning_rate": 9.701581953374813e-07, "logits/chosen": -0.1601131409406662, "logits/rejected": -0.1252242922782898, "logps/chosen": -191.59210205078125, "logps/rejected": -301.4671630859375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.321488857269287, "rewards/margins": 13.209476470947266, "rewards/rejected": -15.530967712402344, "step": 3072 }, { "epoch": 1.05, "learning_rate": 9.69605738013891e-07, "logits/chosen": -0.20799647271633148, "logits/rejected": -0.16736873984336853, "logps/chosen": -231.14964294433594, "logps/rejected": -374.95379638671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2349892556667328, "rewards/margins": 17.37805938720703, "rewards/rejected": -17.613048553466797, "step": 3073 }, { "epoch": 1.05, "learning_rate": 9.69053289975329e-07, "logits/chosen": -0.09204217791557312, "logits/rejected": -0.04930312931537628, "logps/chosen": -243.030517578125, "logps/rejected": -397.8266906738281, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.20379969477653503, "rewards/margins": 19.664514541625977, "rewards/rejected": -19.868316650390625, "step": 3074 }, { "epoch": 1.05, "learning_rate": 9.68500851390559e-07, "logits/chosen": -0.048897936940193176, "logits/rejected": -0.06044916436076164, "logps/chosen": -120.53458404541016, "logps/rejected": -291.024658203125, "loss": 0.0723, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5711157321929932, "rewards/margins": 13.06421184539795, "rewards/rejected": -14.635326385498047, "step": 3075 }, { "epoch": 1.05, "learning_rate": 9.679484224283447e-07, "logits/chosen": -0.1478712260723114, "logits/rejected": -0.08748137950897217, "logps/chosen": -196.85499572753906, "logps/rejected": -348.28912353515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.3549706935882568, "rewards/margins": 17.190576553344727, "rewards/rejected": -18.545547485351562, "step": 3076 }, { "epoch": 1.05, "learning_rate": 9.673960032574451e-07, "logits/chosen": -0.26388826966285706, "logits/rejected": -0.21874144673347473, "logps/chosen": -189.7751922607422, "logps/rejected": -337.8070983886719, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.3134512901306152, "rewards/margins": 16.39618682861328, "rewards/rejected": -17.709640502929688, "step": 3077 }, { "epoch": 1.05, "learning_rate": 9.668435940466162e-07, "logits/chosen": -0.17831739783287048, "logits/rejected": -0.15247386693954468, "logps/chosen": -153.23251342773438, "logps/rejected": -295.1980895996094, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.3364747762680054, "rewards/margins": 14.48250961303711, "rewards/rejected": -15.81898307800293, "step": 3078 }, { "epoch": 1.05, "learning_rate": 9.662911949646121e-07, "logits/chosen": -0.23751960694789886, "logits/rejected": -0.19723235070705414, "logps/chosen": -251.95468139648438, "logps/rejected": -400.760498046875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.3094804286956787, "rewards/margins": 15.768013000488281, "rewards/rejected": -18.07749366760254, "step": 3079 }, { "epoch": 1.05, "learning_rate": 9.657388061801828e-07, "logits/chosen": -0.2294648140668869, "logits/rejected": -0.213603675365448, "logps/chosen": -185.12442016601562, "logps/rejected": -334.6201477050781, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.3464497923851013, "rewards/margins": 15.122159957885742, "rewards/rejected": -14.775711059570312, "step": 3080 }, { "epoch": 1.05, "learning_rate": 9.65186427862075e-07, "logits/chosen": -0.2700219452381134, "logits/rejected": -0.18866212666034698, "logps/chosen": -263.7939453125, "logps/rejected": -294.04071044921875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.4779183864593506, "rewards/margins": 15.583553314208984, "rewards/rejected": -16.061473846435547, "step": 3081 }, { "epoch": 1.05, "learning_rate": 9.646340601790335e-07, "logits/chosen": -0.1604248732328415, "logits/rejected": -0.12100349366664886, "logps/chosen": -187.46734619140625, "logps/rejected": -342.0819091796875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.7881312370300293, "rewards/margins": 15.916560173034668, "rewards/rejected": -17.704692840576172, "step": 3082 }, { "epoch": 1.05, "learning_rate": 9.64081703299798e-07, "logits/chosen": -0.11642837524414062, "logits/rejected": -0.07398266345262527, "logps/chosen": -228.6009521484375, "logps/rejected": -330.68408203125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.381880283355713, "rewards/margins": 13.238824844360352, "rewards/rejected": -14.620704650878906, "step": 3083 }, { "epoch": 1.05, "learning_rate": 9.63529357393107e-07, "logits/chosen": -0.2017718404531479, "logits/rejected": -0.18158026039600372, "logps/chosen": -124.72740173339844, "logps/rejected": -291.6986083984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0263420343399048, "rewards/margins": 14.71862506866455, "rewards/rejected": -15.744969367980957, "step": 3084 }, { "epoch": 1.05, "learning_rate": 9.629770226276938e-07, "logits/chosen": -0.12903724610805511, "logits/rejected": -0.12400664389133453, "logps/chosen": -165.08714294433594, "logps/rejected": -286.98931884765625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8577648401260376, "rewards/margins": 12.803918838500977, "rewards/rejected": -13.661684036254883, "step": 3085 }, { "epoch": 1.05, "learning_rate": 9.624246991722893e-07, "logits/chosen": -0.22994278371334076, "logits/rejected": -0.19468960165977478, "logps/chosen": -196.28590393066406, "logps/rejected": -304.5653991699219, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.1162880659103394, "rewards/margins": 13.883623123168945, "rewards/rejected": -14.999911308288574, "step": 3086 }, { "epoch": 1.05, "learning_rate": 9.618723871956205e-07, "logits/chosen": -0.16852925717830658, "logits/rejected": -0.11550667136907578, "logps/chosen": -194.6932830810547, "logps/rejected": -350.0086364746094, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -2.114318609237671, "rewards/margins": 16.81112289428711, "rewards/rejected": -18.92544174194336, "step": 3087 }, { "epoch": 1.05, "learning_rate": 9.613200868664112e-07, "logits/chosen": -0.11903728544712067, "logits/rejected": -0.10216891765594482, "logps/chosen": -161.91600036621094, "logps/rejected": -303.84783935546875, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.4108338356018066, "rewards/margins": 13.171759605407715, "rewards/rejected": -14.58259391784668, "step": 3088 }, { "epoch": 1.05, "learning_rate": 9.60767798353382e-07, "logits/chosen": -0.1641489714384079, "logits/rejected": -0.14630287885665894, "logps/chosen": -232.9998779296875, "logps/rejected": -381.1523742675781, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -1.008055329322815, "rewards/margins": 16.49602508544922, "rewards/rejected": -17.504077911376953, "step": 3089 }, { "epoch": 1.05, "learning_rate": 9.602155218252488e-07, "logits/chosen": -0.0011930298060178757, "logits/rejected": 0.01078356895595789, "logps/chosen": -190.94581604003906, "logps/rejected": -332.8023376464844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.844545841217041, "rewards/margins": 14.077737808227539, "rewards/rejected": -15.922284126281738, "step": 3090 }, { "epoch": 1.05, "learning_rate": 9.596632574507241e-07, "logits/chosen": -0.19144335389137268, "logits/rejected": -0.17670980095863342, "logps/chosen": -198.24432373046875, "logps/rejected": -336.1546936035156, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.6223548054695129, "rewards/margins": 15.434797286987305, "rewards/rejected": -16.057151794433594, "step": 3091 }, { "epoch": 1.06, "learning_rate": 9.59111005398518e-07, "logits/chosen": -0.17355935275554657, "logits/rejected": -0.14577242732048035, "logps/chosen": -197.46429443359375, "logps/rejected": -353.2740478515625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.4523439407348633, "rewards/margins": 18.420955657958984, "rewards/rejected": -20.87329864501953, "step": 3092 }, { "epoch": 1.06, "learning_rate": 9.585587658373355e-07, "logits/chosen": -0.040814001113176346, "logits/rejected": -0.027455898001790047, "logps/chosen": -188.0370330810547, "logps/rejected": -394.6275329589844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.24316143989563, "rewards/margins": 17.5721492767334, "rewards/rejected": -19.815311431884766, "step": 3093 }, { "epoch": 1.06, "learning_rate": 9.580065389358782e-07, "logits/chosen": -0.10888873040676117, "logits/rejected": -0.08307196944952011, "logps/chosen": -185.97250366210938, "logps/rejected": -289.77581787109375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.262942314147949, "rewards/margins": 11.741897583007812, "rewards/rejected": -14.004838943481445, "step": 3094 }, { "epoch": 1.06, "learning_rate": 9.57454324862844e-07, "logits/chosen": -0.05047868937253952, "logits/rejected": -0.026840301230549812, "logps/chosen": -177.34710693359375, "logps/rejected": -339.8752746582031, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.45644497871398926, "rewards/margins": 16.301958084106445, "rewards/rejected": -16.75840187072754, "step": 3095 }, { "epoch": 1.06, "learning_rate": 9.569021237869257e-07, "logits/chosen": -0.05391566827893257, "logits/rejected": -0.035874143242836, "logps/chosen": -219.86111450195312, "logps/rejected": -339.6973571777344, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.49276304244995117, "rewards/margins": 14.198147773742676, "rewards/rejected": -14.690911293029785, "step": 3096 }, { "epoch": 1.06, "learning_rate": 9.563499358768142e-07, "logits/chosen": -0.08348876237869263, "logits/rejected": -0.046923719346523285, "logps/chosen": -231.15203857421875, "logps/rejected": -353.79931640625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.4280126094818115, "rewards/margins": 14.98350715637207, "rewards/rejected": -16.41152000427246, "step": 3097 }, { "epoch": 1.06, "learning_rate": 9.55797761301195e-07, "logits/chosen": -0.26284918189048767, "logits/rejected": -0.22836622595787048, "logps/chosen": -232.81494140625, "logps/rejected": -373.74835205078125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.3127201199531555, "rewards/margins": 17.007810592651367, "rewards/rejected": -17.32052993774414, "step": 3098 }, { "epoch": 1.06, "learning_rate": 9.552456002287497e-07, "logits/chosen": -0.07995527982711792, "logits/rejected": -0.06119827181100845, "logps/chosen": -196.21917724609375, "logps/rejected": -329.0962219238281, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.5387368202209473, "rewards/margins": 12.943345069885254, "rewards/rejected": -14.482081413269043, "step": 3099 }, { "epoch": 1.06, "learning_rate": 9.546934528281559e-07, "logits/chosen": -0.11074541509151459, "logits/rejected": -0.09913934767246246, "logps/chosen": -150.63912963867188, "logps/rejected": -305.2176208496094, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.858336091041565, "rewards/margins": 14.613691329956055, "rewards/rejected": -16.472026824951172, "step": 3100 }, { "epoch": 1.06, "learning_rate": 9.541413192680867e-07, "logits/chosen": -0.11992418020963669, "logits/rejected": -0.10384819656610489, "logps/chosen": -128.81719970703125, "logps/rejected": -286.2259521484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7113034129142761, "rewards/margins": 11.830072402954102, "rewards/rejected": -12.541375160217285, "step": 3101 }, { "epoch": 1.06, "learning_rate": 9.53589199717212e-07, "logits/chosen": -0.0948910042643547, "logits/rejected": -0.08148497343063354, "logps/chosen": -220.22695922851562, "logps/rejected": -353.46441650390625, "loss": 0.0377, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4341392517089844, "rewards/margins": 14.029969215393066, "rewards/rejected": -16.464107513427734, "step": 3102 }, { "epoch": 1.06, "learning_rate": 9.530370943441958e-07, "logits/chosen": -0.05721025913953781, "logits/rejected": -0.03714555874466896, "logps/chosen": -175.400390625, "logps/rejected": -341.365966796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.639782428741455, "rewards/margins": 16.079864501953125, "rewards/rejected": -17.71964454650879, "step": 3103 }, { "epoch": 1.06, "learning_rate": 9.524850033176998e-07, "logits/chosen": -0.17543844878673553, "logits/rejected": -0.12415485829114914, "logps/chosen": -274.4568786621094, "logps/rejected": -371.2267761230469, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.262082576751709, "rewards/margins": 15.239253044128418, "rewards/rejected": -16.5013370513916, "step": 3104 }, { "epoch": 1.06, "learning_rate": 9.519329268063793e-07, "logits/chosen": -0.33027195930480957, "logits/rejected": -0.32615339756011963, "logps/chosen": -231.06430053710938, "logps/rejected": -426.9338073730469, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9176706075668335, "rewards/margins": 19.744722366333008, "rewards/rejected": -20.66239356994629, "step": 3105 }, { "epoch": 1.06, "learning_rate": 9.513808649788862e-07, "logits/chosen": -0.15256577730178833, "logits/rejected": -0.09730701148509979, "logps/chosen": -221.92547607421875, "logps/rejected": -312.5195007324219, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.32323214411735535, "rewards/margins": 14.0926513671875, "rewards/rejected": -14.415882110595703, "step": 3106 }, { "epoch": 1.06, "learning_rate": 9.50828818003868e-07, "logits/chosen": -0.15915723145008087, "logits/rejected": -0.15489637851715088, "logps/chosen": -210.21969604492188, "logps/rejected": -373.4161071777344, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.013076066970825, "rewards/margins": 12.994994163513184, "rewards/rejected": -15.008069038391113, "step": 3107 }, { "epoch": 1.06, "learning_rate": 9.502767860499672e-07, "logits/chosen": -0.1823052614927292, "logits/rejected": -0.1395672857761383, "logps/chosen": -210.69154357910156, "logps/rejected": -268.0098876953125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.12173817306756973, "rewards/margins": 10.81232738494873, "rewards/rejected": -10.934065818786621, "step": 3108 }, { "epoch": 1.06, "learning_rate": 9.497247692858222e-07, "logits/chosen": -0.18115444481372833, "logits/rejected": -0.1297709345817566, "logps/chosen": -231.94615173339844, "logps/rejected": -296.7339782714844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7473520636558533, "rewards/margins": 12.574236869812012, "rewards/rejected": -13.321589469909668, "step": 3109 }, { "epoch": 1.06, "learning_rate": 9.491727678800665e-07, "logits/chosen": -0.32597723603248596, "logits/rejected": -0.2927711308002472, "logps/chosen": -236.66810607910156, "logps/rejected": -369.7451171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3519742488861084, "rewards/margins": 13.956836700439453, "rewards/rejected": -15.30881118774414, "step": 3110 }, { "epoch": 1.06, "learning_rate": 9.486207820013286e-07, "logits/chosen": -0.18603472411632538, "logits/rejected": -0.13728831708431244, "logps/chosen": -255.02255249023438, "logps/rejected": -373.520263671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9554758667945862, "rewards/margins": 14.964824676513672, "rewards/rejected": -15.92030143737793, "step": 3111 }, { "epoch": 1.06, "learning_rate": 9.480688118182328e-07, "logits/chosen": -0.15387673676013947, "logits/rejected": -0.13831265270709991, "logps/chosen": -220.09213256835938, "logps/rejected": -319.0982360839844, "loss": 0.0234, "rewards/accuracies": 0.9375, "rewards/chosen": -2.907099723815918, "rewards/margins": 9.816856384277344, "rewards/rejected": -12.723956108093262, "step": 3112 }, { "epoch": 1.06, "learning_rate": 9.475168574993982e-07, "logits/chosen": -0.1417388916015625, "logits/rejected": -0.12202239036560059, "logps/chosen": -223.63201904296875, "logps/rejected": -383.2921142578125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.471491277217865, "rewards/margins": 16.0078125, "rewards/rejected": -16.47930335998535, "step": 3113 }, { "epoch": 1.06, "learning_rate": 9.469649192134396e-07, "logits/chosen": -0.13750891387462616, "logits/rejected": -0.10196415334939957, "logps/chosen": -228.68218994140625, "logps/rejected": -348.05712890625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.078545093536377, "rewards/margins": 14.044554710388184, "rewards/rejected": -16.12310028076172, "step": 3114 }, { "epoch": 1.06, "learning_rate": 9.464129971289661e-07, "logits/chosen": -0.13773174583911896, "logits/rejected": -0.09269217401742935, "logps/chosen": -203.2914581298828, "logps/rejected": -272.1669006347656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3304545879364014, "rewards/margins": 10.740683555603027, "rewards/rejected": -12.071137428283691, "step": 3115 }, { "epoch": 1.06, "learning_rate": 9.458610914145825e-07, "logits/chosen": -0.1435885727405548, "logits/rejected": -0.10887306928634644, "logps/chosen": -202.68785095214844, "logps/rejected": -329.1019287109375, "loss": 0.0149, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9728879928588867, "rewards/margins": 14.78364372253418, "rewards/rejected": -15.75653076171875, "step": 3116 }, { "epoch": 1.06, "learning_rate": 9.453092022388884e-07, "logits/chosen": -0.28044402599334717, "logits/rejected": -0.24201998114585876, "logps/chosen": -227.3961181640625, "logps/rejected": -326.17755126953125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.634082555770874, "rewards/margins": 14.141021728515625, "rewards/rejected": -15.775105476379395, "step": 3117 }, { "epoch": 1.06, "learning_rate": 9.447573297704783e-07, "logits/chosen": 0.002339519327506423, "logits/rejected": 0.0275015477091074, "logps/chosen": -173.11329650878906, "logps/rejected": -336.6950378417969, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.1441335678100586, "rewards/margins": 14.822344779968262, "rewards/rejected": -15.96647834777832, "step": 3118 }, { "epoch": 1.06, "learning_rate": 9.442054741779413e-07, "logits/chosen": -0.171188622713089, "logits/rejected": -0.1691436469554901, "logps/chosen": -217.07730102539062, "logps/rejected": -431.3880615234375, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -1.274871826171875, "rewards/margins": 19.60761260986328, "rewards/rejected": -20.882482528686523, "step": 3119 }, { "epoch": 1.06, "learning_rate": 9.436536356298623e-07, "logits/chosen": -0.0497220940887928, "logits/rejected": -0.019684290513396263, "logps/chosen": -145.94338989257812, "logps/rejected": -229.8575897216797, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.9843168258666992, "rewards/margins": 13.354945182800293, "rewards/rejected": -14.339261054992676, "step": 3120 }, { "epoch": 1.07, "learning_rate": 9.431018142948196e-07, "logits/chosen": -0.17453445494174957, "logits/rejected": -0.16748939454555511, "logps/chosen": -172.14138793945312, "logps/rejected": -369.46148681640625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.2260906994342804, "rewards/margins": 17.8660831451416, "rewards/rejected": -18.092174530029297, "step": 3121 }, { "epoch": 1.07, "learning_rate": 9.425500103413878e-07, "logits/chosen": -0.1839819848537445, "logits/rejected": -0.14368495345115662, "logps/chosen": -189.24359130859375, "logps/rejected": -264.19940185546875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.462893545627594, "rewards/margins": 13.339380264282227, "rewards/rejected": -13.802273750305176, "step": 3122 }, { "epoch": 1.07, "learning_rate": 9.419982239381352e-07, "logits/chosen": -0.11848246306180954, "logits/rejected": -0.08698293566703796, "logps/chosen": -205.745361328125, "logps/rejected": -370.5910339355469, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.4885084331035614, "rewards/margins": 17.1804256439209, "rewards/rejected": -17.668933868408203, "step": 3123 }, { "epoch": 1.07, "learning_rate": 9.414464552536241e-07, "logits/chosen": -0.14741568267345428, "logits/rejected": -0.11054179072380066, "logps/chosen": -134.25808715820312, "logps/rejected": -225.53807067871094, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2089946269989014, "rewards/margins": 12.190378189086914, "rewards/rejected": -13.399372100830078, "step": 3124 }, { "epoch": 1.07, "learning_rate": 9.408947044564132e-07, "logits/chosen": -0.031395260244607925, "logits/rejected": -0.03837614506483078, "logps/chosen": -143.61415100097656, "logps/rejected": -286.0751953125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -2.115553855895996, "rewards/margins": 10.926023483276367, "rewards/rejected": -13.041577339172363, "step": 3125 }, { "epoch": 1.07, "learning_rate": 9.403429717150544e-07, "logits/chosen": -0.19200019538402557, "logits/rejected": -0.14382821321487427, "logps/chosen": -183.86932373046875, "logps/rejected": -266.03875732421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.1779080331325531, "rewards/margins": 12.495407104492188, "rewards/rejected": -12.673314094543457, "step": 3126 }, { "epoch": 1.07, "learning_rate": 9.397912571980946e-07, "logits/chosen": -0.12272932380437851, "logits/rejected": -0.08072531223297119, "logps/chosen": -168.9464111328125, "logps/rejected": -310.25958251953125, "loss": 0.0228, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6023757457733154, "rewards/margins": 15.32267951965332, "rewards/rejected": -17.9250545501709, "step": 3127 }, { "epoch": 1.07, "learning_rate": 9.392395610740749e-07, "logits/chosen": -0.15875473618507385, "logits/rejected": -0.13402201235294342, "logps/chosen": -178.09194946289062, "logps/rejected": -306.5220947265625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.093529462814331, "rewards/margins": 11.97436809539795, "rewards/rejected": -14.06789779663086, "step": 3128 }, { "epoch": 1.07, "learning_rate": 9.386878835115304e-07, "logits/chosen": -0.14266261458396912, "logits/rejected": -0.1386861652135849, "logps/chosen": -187.27932739257812, "logps/rejected": -293.8365478515625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.8453783988952637, "rewards/margins": 10.806838989257812, "rewards/rejected": -13.652217864990234, "step": 3129 }, { "epoch": 1.07, "learning_rate": 9.381362246789916e-07, "logits/chosen": -0.1374216228723526, "logits/rejected": -0.10829384624958038, "logps/chosen": -209.4052276611328, "logps/rejected": -309.2179260253906, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.0885392427444458, "rewards/margins": 14.51266860961914, "rewards/rejected": -14.601208686828613, "step": 3130 }, { "epoch": 1.07, "learning_rate": 9.375845847449822e-07, "logits/chosen": -0.07167013734579086, "logits/rejected": -0.05995259806513786, "logps/chosen": -165.51441955566406, "logps/rejected": -339.0998840332031, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -1.2449684143066406, "rewards/margins": 16.450794219970703, "rewards/rejected": -17.69576072692871, "step": 3131 }, { "epoch": 1.07, "learning_rate": 9.370329638780212e-07, "logits/chosen": -0.0760907232761383, "logits/rejected": -0.05685917288064957, "logps/chosen": -161.40635681152344, "logps/rejected": -322.7475891113281, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -2.1716983318328857, "rewards/margins": 14.88776969909668, "rewards/rejected": -17.059467315673828, "step": 3132 }, { "epoch": 1.07, "learning_rate": 9.364813622466206e-07, "logits/chosen": -0.12733685970306396, "logits/rejected": -0.10416876524686813, "logps/chosen": -161.47776794433594, "logps/rejected": -336.7828063964844, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -2.2670276165008545, "rewards/margins": 14.684723854064941, "rewards/rejected": -16.951751708984375, "step": 3133 }, { "epoch": 1.07, "learning_rate": 9.359297800192871e-07, "logits/chosen": -0.14083915948867798, "logits/rejected": -0.11469366401433945, "logps/chosen": -221.66232299804688, "logps/rejected": -373.1584167480469, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.4423887729644775, "rewards/margins": 13.647828102111816, "rewards/rejected": -15.090217590332031, "step": 3134 }, { "epoch": 1.07, "learning_rate": 9.353782173645218e-07, "logits/chosen": -0.1906549334526062, "logits/rejected": -0.13535603880882263, "logps/chosen": -230.671875, "logps/rejected": -262.5301208496094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.10719597339630127, "rewards/margins": 13.40369987487793, "rewards/rejected": -13.296504020690918, "step": 3135 }, { "epoch": 1.07, "learning_rate": 9.34826674450819e-07, "logits/chosen": -0.18586884438991547, "logits/rejected": -0.1536596268415451, "logps/chosen": -183.00418090820312, "logps/rejected": -317.3358154296875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.8790905475616455, "rewards/margins": 13.589679718017578, "rewards/rejected": -14.468770027160645, "step": 3136 }, { "epoch": 1.07, "learning_rate": 9.342751514466681e-07, "logits/chosen": -0.2344483584165573, "logits/rejected": -0.19150038063526154, "logps/chosen": -227.71519470214844, "logps/rejected": -365.2087097167969, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.2330315113067627, "rewards/margins": 15.55988883972168, "rewards/rejected": -17.79292106628418, "step": 3137 }, { "epoch": 1.07, "learning_rate": 9.337236485205515e-07, "logits/chosen": -0.19019217789173126, "logits/rejected": -0.15178297460079193, "logps/chosen": -227.33090209960938, "logps/rejected": -372.5757751464844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6271567344665527, "rewards/margins": 16.326622009277344, "rewards/rejected": -17.953779220581055, "step": 3138 }, { "epoch": 1.07, "learning_rate": 9.331721658409451e-07, "logits/chosen": -0.17657825350761414, "logits/rejected": -0.14690183103084564, "logps/chosen": -215.2215576171875, "logps/rejected": -382.2554626464844, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5052939653396606, "rewards/margins": 18.453655242919922, "rewards/rejected": -19.95895004272461, "step": 3139 }, { "epoch": 1.07, "learning_rate": 9.326207035763202e-07, "logits/chosen": -0.11719764024019241, "logits/rejected": -0.08120378106832504, "logps/chosen": -215.22775268554688, "logps/rejected": -317.876708984375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.8296600580215454, "rewards/margins": 12.821205139160156, "rewards/rejected": -13.65086555480957, "step": 3140 }, { "epoch": 1.07, "learning_rate": 9.320692618951402e-07, "logits/chosen": -0.11112800985574722, "logits/rejected": -0.08524336665868759, "logps/chosen": -157.73873901367188, "logps/rejected": -347.8079833984375, "loss": 0.1026, "rewards/accuracies": 0.9375, "rewards/chosen": -3.0455145835876465, "rewards/margins": 16.645221710205078, "rewards/rejected": -19.69073486328125, "step": 3141 }, { "epoch": 1.07, "learning_rate": 9.315178409658636e-07, "logits/chosen": -0.08433158695697784, "logits/rejected": -0.06742396950721741, "logps/chosen": -197.55990600585938, "logps/rejected": -317.12176513671875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -2.0233538150787354, "rewards/margins": 13.013938903808594, "rewards/rejected": -15.03729248046875, "step": 3142 }, { "epoch": 1.07, "learning_rate": 9.309664409569416e-07, "logits/chosen": -0.2385646253824234, "logits/rejected": -0.2206839919090271, "logps/chosen": -215.42723083496094, "logps/rejected": -336.83087158203125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.7585945129394531, "rewards/margins": 14.059768676757812, "rewards/rejected": -15.818363189697266, "step": 3143 }, { "epoch": 1.07, "learning_rate": 9.304150620368187e-07, "logits/chosen": -0.28870633244514465, "logits/rejected": -0.2501069903373718, "logps/chosen": -273.1022644042969, "logps/rejected": -428.02557373046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3919256925582886, "rewards/margins": 17.4804744720459, "rewards/rejected": -18.872400283813477, "step": 3144 }, { "epoch": 1.07, "learning_rate": 9.298637043739345e-07, "logits/chosen": -0.10741961747407913, "logits/rejected": -0.09030979871749878, "logps/chosen": -219.82278442382812, "logps/rejected": -386.6274108886719, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.13539698719978333, "rewards/margins": 18.69716453552246, "rewards/rejected": -18.832561492919922, "step": 3145 }, { "epoch": 1.07, "learning_rate": 9.293123681367203e-07, "logits/chosen": -0.1804114133119583, "logits/rejected": -0.16467103362083435, "logps/chosen": -297.6932373046875, "logps/rejected": -437.518798828125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.491784691810608, "rewards/margins": 14.224949836730957, "rewards/rejected": -15.7167329788208, "step": 3146 }, { "epoch": 1.07, "learning_rate": 9.287610534936025e-07, "logits/chosen": -0.13191315531730652, "logits/rejected": -0.10501109063625336, "logps/chosen": -163.3719482421875, "logps/rejected": -239.47891235351562, "loss": 0.0598, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1269562244415283, "rewards/margins": 8.97001838684082, "rewards/rejected": -12.096973419189453, "step": 3147 }, { "epoch": 1.07, "learning_rate": 9.282097606129998e-07, "logits/chosen": -0.13076451420783997, "logits/rejected": -0.10370424389839172, "logps/chosen": -176.2381591796875, "logps/rejected": -379.3201904296875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.89617919921875, "rewards/margins": 17.3612060546875, "rewards/rejected": -19.25738525390625, "step": 3148 }, { "epoch": 1.07, "learning_rate": 9.27658489663324e-07, "logits/chosen": -0.12958964705467224, "logits/rejected": -0.1126028448343277, "logps/chosen": -204.82664489746094, "logps/rejected": -329.5298767089844, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.873464584350586, "rewards/margins": 11.426189422607422, "rewards/rejected": -13.299654006958008, "step": 3149 }, { "epoch": 1.08, "learning_rate": 9.271072408129818e-07, "logits/chosen": -0.2325386106967926, "logits/rejected": -0.19847843050956726, "logps/chosen": -213.7554473876953, "logps/rejected": -421.0218200683594, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2641919255256653, "rewards/margins": 18.88920783996582, "rewards/rejected": -18.625015258789062, "step": 3150 }, { "epoch": 1.08, "learning_rate": 9.265560142303709e-07, "logits/chosen": -0.050254352390766144, "logits/rejected": -0.016909508034586906, "logps/chosen": -168.71575927734375, "logps/rejected": -241.9451904296875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.593208372592926, "rewards/margins": 11.042861938476562, "rewards/rejected": -11.636070251464844, "step": 3151 }, { "epoch": 1.08, "learning_rate": 9.260048100838846e-07, "logits/chosen": -0.15805649757385254, "logits/rejected": -0.10610216110944748, "logps/chosen": -252.65817260742188, "logps/rejected": -315.9561462402344, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.16001325845718384, "rewards/margins": 13.892685890197754, "rewards/rejected": -14.052700996398926, "step": 3152 }, { "epoch": 1.08, "learning_rate": 9.25453628541908e-07, "logits/chosen": -0.17205765843391418, "logits/rejected": -0.1591949760913849, "logps/chosen": -210.69224548339844, "logps/rejected": -376.1203918457031, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.8243327140808105, "rewards/margins": 14.27888011932373, "rewards/rejected": -16.103212356567383, "step": 3153 }, { "epoch": 1.08, "learning_rate": 9.249024697728183e-07, "logits/chosen": -0.2475374937057495, "logits/rejected": -0.22765113413333893, "logps/chosen": -220.7393798828125, "logps/rejected": -387.6424560546875, "loss": 0.0254, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9882895946502686, "rewards/margins": 14.91664981842041, "rewards/rejected": -15.904940605163574, "step": 3154 }, { "epoch": 1.08, "learning_rate": 9.243513339449883e-07, "logits/chosen": -0.21391867101192474, "logits/rejected": -0.15352413058280945, "logps/chosen": -206.87930297851562, "logps/rejected": -317.98577880859375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5925308465957642, "rewards/margins": 15.105274200439453, "rewards/rejected": -16.697805404663086, "step": 3155 }, { "epoch": 1.08, "learning_rate": 9.238002212267821e-07, "logits/chosen": -0.1475757509469986, "logits/rejected": -0.13275067508220673, "logps/chosen": -221.76995849609375, "logps/rejected": -410.89483642578125, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -3.333624839782715, "rewards/margins": 18.12242317199707, "rewards/rejected": -21.4560489654541, "step": 3156 }, { "epoch": 1.08, "learning_rate": 9.23249131786556e-07, "logits/chosen": -0.20600160956382751, "logits/rejected": -0.20213542878627777, "logps/chosen": -214.7272491455078, "logps/rejected": -386.0377197265625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.8844496607780457, "rewards/margins": 16.39840316772461, "rewards/rejected": -17.282854080200195, "step": 3157 }, { "epoch": 1.08, "learning_rate": 9.226980657926613e-07, "logits/chosen": -0.17646020650863647, "logits/rejected": -0.12146538496017456, "logps/chosen": -222.54019165039062, "logps/rejected": -290.8224182128906, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.556800365447998, "rewards/margins": 12.693432807922363, "rewards/rejected": -15.250232696533203, "step": 3158 }, { "epoch": 1.08, "learning_rate": 9.221470234134405e-07, "logits/chosen": -0.1724245697259903, "logits/rejected": -0.1487186849117279, "logps/chosen": -244.03970336914062, "logps/rejected": -386.2981262207031, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.5888899564743042, "rewards/margins": 16.8172550201416, "rewards/rejected": -17.406145095825195, "step": 3159 }, { "epoch": 1.08, "learning_rate": 9.215960048172299e-07, "logits/chosen": -0.15922363102436066, "logits/rejected": -0.14464063942432404, "logps/chosen": -179.21751403808594, "logps/rejected": -333.27716064453125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.1065977811813354, "rewards/margins": 16.263931274414062, "rewards/rejected": -17.370529174804688, "step": 3160 }, { "epoch": 1.08, "learning_rate": 9.210450101723578e-07, "logits/chosen": -0.19618795812129974, "logits/rejected": -0.17150220274925232, "logps/chosen": -228.4084014892578, "logps/rejected": -410.90753173828125, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1298682689666748, "rewards/margins": 18.17929458618164, "rewards/rejected": -19.30916404724121, "step": 3161 }, { "epoch": 1.08, "learning_rate": 9.204940396471453e-07, "logits/chosen": -0.2652772068977356, "logits/rejected": -0.2522541582584381, "logps/chosen": -194.7626495361328, "logps/rejected": -352.1079406738281, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -1.3116689920425415, "rewards/margins": 14.659494400024414, "rewards/rejected": -15.971162796020508, "step": 3162 }, { "epoch": 1.08, "learning_rate": 9.199430934099067e-07, "logits/chosen": -0.2049717754125595, "logits/rejected": -0.1935584545135498, "logps/chosen": -241.4491424560547, "logps/rejected": -357.6413879394531, "loss": 0.0171, "rewards/accuracies": 0.9375, "rewards/chosen": -1.836303949356079, "rewards/margins": 12.719104766845703, "rewards/rejected": -14.55540943145752, "step": 3163 }, { "epoch": 1.08, "learning_rate": 9.193921716289481e-07, "logits/chosen": -0.2179216891527176, "logits/rejected": -0.18659670650959015, "logps/chosen": -202.93128967285156, "logps/rejected": -297.69805908203125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.8651618957519531, "rewards/margins": 12.614021301269531, "rewards/rejected": -14.479183197021484, "step": 3164 }, { "epoch": 1.08, "learning_rate": 9.188412744725689e-07, "logits/chosen": -0.17791374027729034, "logits/rejected": -0.15678603947162628, "logps/chosen": -244.0684356689453, "logps/rejected": -370.3956298828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9699376821517944, "rewards/margins": 15.565176010131836, "rewards/rejected": -16.535114288330078, "step": 3165 }, { "epoch": 1.08, "learning_rate": 9.182904021090601e-07, "logits/chosen": -0.16333366930484772, "logits/rejected": -0.11718254536390305, "logps/chosen": -150.78797912597656, "logps/rejected": -244.79351806640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.8231079578399658, "rewards/margins": 12.5376615524292, "rewards/rejected": -14.360771179199219, "step": 3166 }, { "epoch": 1.08, "learning_rate": 9.177395547067056e-07, "logits/chosen": -0.17055822908878326, "logits/rejected": -0.13862383365631104, "logps/chosen": -202.54238891601562, "logps/rejected": -318.71575927734375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7361718416213989, "rewards/margins": 13.998242378234863, "rewards/rejected": -14.734414100646973, "step": 3167 }, { "epoch": 1.08, "learning_rate": 9.171887324337826e-07, "logits/chosen": -0.2186027467250824, "logits/rejected": -0.1772167980670929, "logps/chosen": -222.00230407714844, "logps/rejected": -332.6708068847656, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.3868137001991272, "rewards/margins": 15.91678237915039, "rewards/rejected": -16.30359649658203, "step": 3168 }, { "epoch": 1.08, "learning_rate": 9.166379354585582e-07, "logits/chosen": -0.16011708974838257, "logits/rejected": -0.10555334389209747, "logps/chosen": -227.8992462158203, "logps/rejected": -297.9986267089844, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.32685619592666626, "rewards/margins": 16.29505157470703, "rewards/rejected": -15.968194007873535, "step": 3169 }, { "epoch": 1.08, "learning_rate": 9.160871639492946e-07, "logits/chosen": -0.1430855542421341, "logits/rejected": -0.10869771987199783, "logps/chosen": -219.98721313476562, "logps/rejected": -376.12713623046875, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": -1.9049466848373413, "rewards/margins": 17.838916778564453, "rewards/rejected": -19.743865966796875, "step": 3170 }, { "epoch": 1.08, "learning_rate": 9.155364180742448e-07, "logits/chosen": -0.2098914086818695, "logits/rejected": -0.1376195102930069, "logps/chosen": -253.05905151367188, "logps/rejected": -353.6317138671875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.7797076106071472, "rewards/margins": 15.034440040588379, "rewards/rejected": -15.814148902893066, "step": 3171 }, { "epoch": 1.08, "learning_rate": 9.149856980016528e-07, "logits/chosen": -0.16125179827213287, "logits/rejected": -0.1407567262649536, "logps/chosen": -212.01963806152344, "logps/rejected": -366.03582763671875, "loss": 0.0221, "rewards/accuracies": 0.9375, "rewards/chosen": -1.294379711151123, "rewards/margins": 14.880181312561035, "rewards/rejected": -16.174560546875, "step": 3172 }, { "epoch": 1.08, "learning_rate": 9.144350038997573e-07, "logits/chosen": -0.1747862696647644, "logits/rejected": -0.12733684480190277, "logps/chosen": -243.77249145507812, "logps/rejected": -423.3822021484375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2308673858642578, "rewards/margins": 18.978586196899414, "rewards/rejected": -20.20945167541504, "step": 3173 }, { "epoch": 1.08, "learning_rate": 9.138843359367871e-07, "logits/chosen": -0.13919517397880554, "logits/rejected": -0.12060278654098511, "logps/chosen": -164.9200897216797, "logps/rejected": -340.4136047363281, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -2.081733465194702, "rewards/margins": 16.705495834350586, "rewards/rejected": -18.787229537963867, "step": 3174 }, { "epoch": 1.08, "learning_rate": 9.133336942809639e-07, "logits/chosen": -0.1765202432870865, "logits/rejected": -0.15287405252456665, "logps/chosen": -265.8238525390625, "logps/rejected": -416.344482421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.31640174984931946, "rewards/margins": 16.23166847229004, "rewards/rejected": -16.548070907592773, "step": 3175 }, { "epoch": 1.08, "learning_rate": 9.127830791005013e-07, "logits/chosen": -0.18482442200183868, "logits/rejected": -0.16298189759254456, "logps/chosen": -221.62557983398438, "logps/rejected": -451.5989990234375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.7632625102996826, "rewards/margins": 20.279888153076172, "rewards/rejected": -23.04315185546875, "step": 3176 }, { "epoch": 1.08, "learning_rate": 9.122324905636042e-07, "logits/chosen": -0.3052852153778076, "logits/rejected": -0.2943037748336792, "logps/chosen": -163.858154296875, "logps/rejected": -296.5411682128906, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.902228832244873, "rewards/margins": 13.445837020874023, "rewards/rejected": -15.348065376281738, "step": 3177 }, { "epoch": 1.08, "learning_rate": 9.116819288384701e-07, "logits/chosen": -0.2015761137008667, "logits/rejected": -0.18729834258556366, "logps/chosen": -253.2954559326172, "logps/rejected": -403.111328125, "loss": 0.033, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2678075432777405, "rewards/margins": 14.75464916229248, "rewards/rejected": -15.022455215454102, "step": 3178 }, { "epoch": 1.08, "learning_rate": 9.111313940932878e-07, "logits/chosen": -0.23579256236553192, "logits/rejected": -0.20441479980945587, "logps/chosen": -178.68545532226562, "logps/rejected": -354.3190612792969, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.0207736492156982, "rewards/margins": 17.73248863220215, "rewards/rejected": -19.753265380859375, "step": 3179 }, { "epoch": 1.09, "learning_rate": 9.105808864962383e-07, "logits/chosen": -0.26137450337409973, "logits/rejected": -0.2116876244544983, "logps/chosen": -239.32363891601562, "logps/rejected": -416.37579345703125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -2.3246684074401855, "rewards/margins": 18.164371490478516, "rewards/rejected": -20.48904037475586, "step": 3180 }, { "epoch": 1.09, "learning_rate": 9.100304062154941e-07, "logits/chosen": -0.16499653458595276, "logits/rejected": -0.1618432104587555, "logps/chosen": -203.18344116210938, "logps/rejected": -378.7737731933594, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -1.7921299934387207, "rewards/margins": 13.404664039611816, "rewards/rejected": -15.196794509887695, "step": 3181 }, { "epoch": 1.09, "learning_rate": 9.094799534192187e-07, "logits/chosen": -0.07860301434993744, "logits/rejected": -0.040160924196243286, "logps/chosen": -143.64337158203125, "logps/rejected": -312.6624450683594, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.333983898162842, "rewards/margins": 17.13430404663086, "rewards/rejected": -19.46828842163086, "step": 3182 }, { "epoch": 1.09, "learning_rate": 9.089295282755689e-07, "logits/chosen": -0.2532152831554413, "logits/rejected": -0.22379420697689056, "logps/chosen": -226.5990447998047, "logps/rejected": -376.4544372558594, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.17762872576713562, "rewards/margins": 16.31984519958496, "rewards/rejected": -16.1422176361084, "step": 3183 }, { "epoch": 1.09, "learning_rate": 9.083791309526908e-07, "logits/chosen": -0.1881043165922165, "logits/rejected": -0.16820329427719116, "logps/chosen": -162.92620849609375, "logps/rejected": -240.75466918945312, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8140417337417603, "rewards/margins": 10.35843276977539, "rewards/rejected": -11.172473907470703, "step": 3184 }, { "epoch": 1.09, "learning_rate": 9.078287616187244e-07, "logits/chosen": -0.23432984948158264, "logits/rejected": -0.2222115397453308, "logps/chosen": -169.0701446533203, "logps/rejected": -332.88885498046875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.0542380809783936, "rewards/margins": 14.396989822387695, "rewards/rejected": -16.45122718811035, "step": 3185 }, { "epoch": 1.09, "learning_rate": 9.072784204417994e-07, "logits/chosen": -0.18600475788116455, "logits/rejected": -0.16929171979427338, "logps/chosen": -201.1953582763672, "logps/rejected": -342.6588439941406, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.8522247076034546, "rewards/margins": 15.601632118225098, "rewards/rejected": -17.453857421875, "step": 3186 }, { "epoch": 1.09, "learning_rate": 9.067281075900366e-07, "logits/chosen": -0.31734350323677063, "logits/rejected": -0.2906774878501892, "logps/chosen": -195.7786407470703, "logps/rejected": -339.6678466796875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.2937067151069641, "rewards/margins": 14.078607559204102, "rewards/rejected": -14.372315406799316, "step": 3187 }, { "epoch": 1.09, "learning_rate": 9.061778232315503e-07, "logits/chosen": -0.24208524823188782, "logits/rejected": -0.2186145782470703, "logps/chosen": -183.77694702148438, "logps/rejected": -341.48309326171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.419710874557495, "rewards/margins": 15.184589385986328, "rewards/rejected": -17.60430145263672, "step": 3188 }, { "epoch": 1.09, "learning_rate": 9.05627567534444e-07, "logits/chosen": -0.21642549335956573, "logits/rejected": -0.1778450757265091, "logps/chosen": -245.0028533935547, "logps/rejected": -347.8437805175781, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.12638524174690247, "rewards/margins": 15.945601463317871, "rewards/rejected": -16.07198715209961, "step": 3189 }, { "epoch": 1.09, "learning_rate": 9.050773406668138e-07, "logits/chosen": -0.2319636344909668, "logits/rejected": -0.20805363357067108, "logps/chosen": -198.93653869628906, "logps/rejected": -316.17431640625, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -1.5821542739868164, "rewards/margins": 13.122278213500977, "rewards/rejected": -14.70443058013916, "step": 3190 }, { "epoch": 1.09, "learning_rate": 9.04527142796746e-07, "logits/chosen": -0.22314375638961792, "logits/rejected": -0.22815346717834473, "logps/chosen": -214.5354461669922, "logps/rejected": -415.359619140625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.158928871154785, "rewards/margins": 16.625930786132812, "rewards/rejected": -18.784860610961914, "step": 3191 }, { "epoch": 1.09, "learning_rate": 9.039769740923182e-07, "logits/chosen": -0.15842227637767792, "logits/rejected": -0.1563740223646164, "logps/chosen": -215.76007080078125, "logps/rejected": -396.4833984375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.872190535068512, "rewards/margins": 16.12883186340332, "rewards/rejected": -17.001022338867188, "step": 3192 }, { "epoch": 1.09, "learning_rate": 9.034268347216e-07, "logits/chosen": -0.17560546100139618, "logits/rejected": -0.16083209216594696, "logps/chosen": -197.55978393554688, "logps/rejected": -391.32049560546875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.7516661882400513, "rewards/margins": 17.644594192504883, "rewards/rejected": -18.396259307861328, "step": 3193 }, { "epoch": 1.09, "learning_rate": 9.02876724852651e-07, "logits/chosen": -0.1392374187707901, "logits/rejected": -0.10109042376279831, "logps/chosen": -196.06199645996094, "logps/rejected": -329.61859130859375, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.3784034848213196, "rewards/margins": 14.829035758972168, "rewards/rejected": -15.207440376281738, "step": 3194 }, { "epoch": 1.09, "learning_rate": 9.023266446535219e-07, "logits/chosen": -0.277675598859787, "logits/rejected": -0.27547264099121094, "logps/chosen": -194.48638916015625, "logps/rejected": -396.2115783691406, "loss": 0.0237, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3354358673095703, "rewards/margins": 16.98889923095703, "rewards/rejected": -18.324337005615234, "step": 3195 }, { "epoch": 1.09, "learning_rate": 9.017765942922553e-07, "logits/chosen": -0.18926160037517548, "logits/rejected": -0.17393629252910614, "logps/chosen": -213.9849853515625, "logps/rejected": -363.8076171875, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -1.3773767948150635, "rewards/margins": 15.93521499633789, "rewards/rejected": -17.312593460083008, "step": 3196 }, { "epoch": 1.09, "learning_rate": 9.012265739368832e-07, "logits/chosen": -0.21261152625083923, "logits/rejected": -0.17305423319339752, "logps/chosen": -200.15274047851562, "logps/rejected": -343.55865478515625, "loss": 0.0194, "rewards/accuracies": 0.9375, "rewards/chosen": 0.13543295860290527, "rewards/margins": 15.290011405944824, "rewards/rejected": -15.15457820892334, "step": 3197 }, { "epoch": 1.09, "learning_rate": 9.006765837554302e-07, "logits/chosen": -0.13173893094062805, "logits/rejected": -0.10409677773714066, "logps/chosen": -150.3448486328125, "logps/rejected": -261.6292419433594, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.3409372568130493, "rewards/margins": 11.891478538513184, "rewards/rejected": -13.232416152954102, "step": 3198 }, { "epoch": 1.09, "learning_rate": 9.001266239159102e-07, "logits/chosen": -0.2838703393936157, "logits/rejected": -0.2201271504163742, "logps/chosen": -282.1664123535156, "logps/rejected": -390.01617431640625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4997931718826294, "rewards/margins": 18.685388565063477, "rewards/rejected": -19.1851806640625, "step": 3199 }, { "epoch": 1.09, "learning_rate": 8.995766945863275e-07, "logits/chosen": -0.23942458629608154, "logits/rejected": -0.21171748638153076, "logps/chosen": -217.0388946533203, "logps/rejected": -326.59393310546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.3781672418117523, "rewards/margins": 14.718335151672363, "rewards/rejected": -15.096503257751465, "step": 3200 }, { "epoch": 1.09, "learning_rate": 8.990267959346797e-07, "logits/chosen": -0.23418888449668884, "logits/rejected": -0.21989141404628754, "logps/chosen": -190.42352294921875, "logps/rejected": -348.0929870605469, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.0835089683532715, "rewards/margins": 14.233513832092285, "rewards/rejected": -16.3170223236084, "step": 3201 }, { "epoch": 1.09, "learning_rate": 8.984769281289515e-07, "logits/chosen": -0.16826172173023224, "logits/rejected": -0.15198230743408203, "logps/chosen": -161.07965087890625, "logps/rejected": -275.38262939453125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.7178518772125244, "rewards/margins": 12.522305488586426, "rewards/rejected": -13.240157127380371, "step": 3202 }, { "epoch": 1.09, "learning_rate": 8.979270913371212e-07, "logits/chosen": -0.21409568190574646, "logits/rejected": -0.1602664738893509, "logps/chosen": -175.57321166992188, "logps/rejected": -294.1061706542969, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.9983948469161987, "rewards/margins": 14.025714874267578, "rewards/rejected": -16.024110794067383, "step": 3203 }, { "epoch": 1.09, "learning_rate": 8.973772857271557e-07, "logits/chosen": -0.2808338403701782, "logits/rejected": -0.26682302355766296, "logps/chosen": -237.10247802734375, "logps/rejected": -398.5320129394531, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.0487959384918213, "rewards/margins": 14.724455833435059, "rewards/rejected": -16.773250579833984, "step": 3204 }, { "epoch": 1.09, "learning_rate": 8.968275114670132e-07, "logits/chosen": -0.22969776391983032, "logits/rejected": -0.18927115201950073, "logps/chosen": -202.39199829101562, "logps/rejected": -293.15673828125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6374362111091614, "rewards/margins": 14.495439529418945, "rewards/rejected": -15.132874488830566, "step": 3205 }, { "epoch": 1.09, "learning_rate": 8.962777687246422e-07, "logits/chosen": -0.08578572422266006, "logits/rejected": -0.09880663454532623, "logps/chosen": -173.99465942382812, "logps/rejected": -377.5868225097656, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -2.715623140335083, "rewards/margins": 15.512032508850098, "rewards/rejected": -18.227657318115234, "step": 3206 }, { "epoch": 1.09, "learning_rate": 8.957280576679813e-07, "logits/chosen": -0.18434245884418488, "logits/rejected": -0.1461976021528244, "logps/chosen": -211.5600128173828, "logps/rejected": -320.099853515625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.38070711493492126, "rewards/margins": 13.735140800476074, "rewards/rejected": -14.115848541259766, "step": 3207 }, { "epoch": 1.09, "learning_rate": 8.9517837846496e-07, "logits/chosen": -0.19314132630825043, "logits/rejected": -0.18667174875736237, "logps/chosen": -224.31597900390625, "logps/rejected": -413.1455383300781, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9179038405418396, "rewards/margins": 18.266115188598633, "rewards/rejected": -19.184019088745117, "step": 3208 }, { "epoch": 1.1, "learning_rate": 8.946287312834975e-07, "logits/chosen": -0.20245447754859924, "logits/rejected": -0.1524745672941208, "logps/chosen": -221.804443359375, "logps/rejected": -323.8363342285156, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1947736740112305, "rewards/margins": 15.985246658325195, "rewards/rejected": -17.180021286010742, "step": 3209 }, { "epoch": 1.1, "learning_rate": 8.940791162915032e-07, "logits/chosen": -0.26448264718055725, "logits/rejected": -0.23609495162963867, "logps/chosen": -170.63580322265625, "logps/rejected": -293.9029541015625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.1414272785186768, "rewards/margins": 12.292903900146484, "rewards/rejected": -14.434332847595215, "step": 3210 }, { "epoch": 1.1, "learning_rate": 8.935295336568773e-07, "logits/chosen": -0.19775277376174927, "logits/rejected": -0.20014329254627228, "logps/chosen": -189.96617126464844, "logps/rejected": -350.5424499511719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.3124080002307892, "rewards/margins": 13.761236190795898, "rewards/rejected": -14.073643684387207, "step": 3211 }, { "epoch": 1.1, "learning_rate": 8.929799835475093e-07, "logits/chosen": -0.10186981409788132, "logits/rejected": -0.07316962629556656, "logps/chosen": -111.24292755126953, "logps/rejected": -231.39010620117188, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.08701738715171814, "rewards/margins": 11.372812271118164, "rewards/rejected": -11.285795211791992, "step": 3212 }, { "epoch": 1.1, "learning_rate": 8.924304661312799e-07, "logits/chosen": -0.21896761655807495, "logits/rejected": -0.1959458887577057, "logps/chosen": -223.6258087158203, "logps/rejected": -357.6283874511719, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.22710692882537842, "rewards/margins": 17.598451614379883, "rewards/rejected": -17.825557708740234, "step": 3213 }, { "epoch": 1.1, "learning_rate": 8.918809815760583e-07, "logits/chosen": -0.2525615394115448, "logits/rejected": -0.2293950617313385, "logps/chosen": -189.56573486328125, "logps/rejected": -301.77239990234375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.7100234627723694, "rewards/margins": 13.668991088867188, "rewards/rejected": -14.37901496887207, "step": 3214 }, { "epoch": 1.1, "learning_rate": 8.913315300497046e-07, "logits/chosen": -0.267031729221344, "logits/rejected": -0.2447529435157776, "logps/chosen": -258.3818359375, "logps/rejected": -348.2962951660156, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 0.11833085119724274, "rewards/margins": 12.98213005065918, "rewards/rejected": -12.863800048828125, "step": 3215 }, { "epoch": 1.1, "learning_rate": 8.907821117200693e-07, "logits/chosen": -0.17549625039100647, "logits/rejected": -0.12776020169258118, "logps/chosen": -165.1666259765625, "logps/rejected": -238.82965087890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8205612897872925, "rewards/margins": 12.304187774658203, "rewards/rejected": -13.124751091003418, "step": 3216 }, { "epoch": 1.1, "learning_rate": 8.902327267549909e-07, "logits/chosen": -0.22203657031059265, "logits/rejected": -0.17187538743019104, "logps/chosen": -220.17605590820312, "logps/rejected": -348.9235534667969, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.9430493116378784, "rewards/margins": 15.903741836547852, "rewards/rejected": -17.846790313720703, "step": 3217 }, { "epoch": 1.1, "learning_rate": 8.896833753223003e-07, "logits/chosen": -0.10437380522489548, "logits/rejected": -0.07125493884086609, "logps/chosen": -160.84490966796875, "logps/rejected": -260.71038818359375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.7038085460662842, "rewards/margins": 12.638018608093262, "rewards/rejected": -14.341827392578125, "step": 3218 }, { "epoch": 1.1, "learning_rate": 8.891340575898161e-07, "logits/chosen": -0.18185561895370483, "logits/rejected": -0.1536509394645691, "logps/chosen": -159.67994689941406, "logps/rejected": -287.0245361328125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.3169752359390259, "rewards/margins": 13.188577651977539, "rewards/rejected": -14.505553245544434, "step": 3219 }, { "epoch": 1.1, "learning_rate": 8.885847737253469e-07, "logits/chosen": -0.23098845779895782, "logits/rejected": -0.1818903088569641, "logps/chosen": -277.9606018066406, "logps/rejected": -348.47625732421875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -3.102482795715332, "rewards/margins": 11.846683502197266, "rewards/rejected": -14.949165344238281, "step": 3220 }, { "epoch": 1.1, "learning_rate": 8.880355238966921e-07, "logits/chosen": -0.20251135528087616, "logits/rejected": -0.1936735212802887, "logps/chosen": -230.9705352783203, "logps/rejected": -453.2198486328125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.045135974884033, "rewards/margins": 18.966516494750977, "rewards/rejected": -21.011653900146484, "step": 3221 }, { "epoch": 1.1, "learning_rate": 8.874863082716395e-07, "logits/chosen": -0.20266388356685638, "logits/rejected": -0.1968439668416977, "logps/chosen": -201.89474487304688, "logps/rejected": -314.2649230957031, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.6992776393890381, "rewards/margins": 12.056113243103027, "rewards/rejected": -12.755391120910645, "step": 3222 }, { "epoch": 1.1, "learning_rate": 8.869371270179671e-07, "logits/chosen": -0.23023274540901184, "logits/rejected": -0.18809837102890015, "logps/chosen": -252.15928649902344, "logps/rejected": -361.76165771484375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.056028366088867, "rewards/margins": 14.931629180908203, "rewards/rejected": -16.98765754699707, "step": 3223 }, { "epoch": 1.1, "learning_rate": 8.86387980303442e-07, "logits/chosen": -0.127487450838089, "logits/rejected": -0.11480863392353058, "logps/chosen": -209.71926879882812, "logps/rejected": -376.4233703613281, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.5371301174163818, "rewards/margins": 16.119953155517578, "rewards/rejected": -15.582823753356934, "step": 3224 }, { "epoch": 1.1, "learning_rate": 8.85838868295821e-07, "logits/chosen": -0.11985882371664047, "logits/rejected": -0.08858757466077805, "logps/chosen": -160.134033203125, "logps/rejected": -306.11083984375, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -2.2011852264404297, "rewards/margins": 14.647353172302246, "rewards/rejected": -16.84853744506836, "step": 3225 }, { "epoch": 1.1, "learning_rate": 8.852897911628504e-07, "logits/chosen": -0.2032998502254486, "logits/rejected": -0.17490439116954803, "logps/chosen": -203.70094299316406, "logps/rejected": -374.2113342285156, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.449100971221924, "rewards/margins": 16.625986099243164, "rewards/rejected": -19.07508659362793, "step": 3226 }, { "epoch": 1.1, "learning_rate": 8.847407490722651e-07, "logits/chosen": -0.15361827611923218, "logits/rejected": -0.1217799261212349, "logps/chosen": -187.52395629882812, "logps/rejected": -365.8902282714844, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.194035053253174, "rewards/margins": 17.167835235595703, "rewards/rejected": -19.36186981201172, "step": 3227 }, { "epoch": 1.1, "learning_rate": 8.841917421917912e-07, "logits/chosen": -0.1944057196378708, "logits/rejected": -0.1712852418422699, "logps/chosen": -191.24221801757812, "logps/rejected": -344.0500793457031, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.2925493121147156, "rewards/margins": 15.726905822753906, "rewards/rejected": -15.434356689453125, "step": 3228 }, { "epoch": 1.1, "learning_rate": 8.836427706891415e-07, "logits/chosen": -0.1415533572435379, "logits/rejected": -0.09998004883527756, "logps/chosen": -202.56407165527344, "logps/rejected": -332.2625427246094, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.517392635345459, "rewards/margins": 15.406325340270996, "rewards/rejected": -16.92371940612793, "step": 3229 }, { "epoch": 1.1, "learning_rate": 8.830938347320191e-07, "logits/chosen": -0.13045665621757507, "logits/rejected": -0.11313492059707642, "logps/chosen": -195.60597229003906, "logps/rejected": -323.5195617675781, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.6376584768295288, "rewards/margins": 11.59592056274414, "rewards/rejected": -13.233579635620117, "step": 3230 }, { "epoch": 1.1, "learning_rate": 8.825449344881176e-07, "logits/chosen": -0.1394316852092743, "logits/rejected": -0.0968780443072319, "logps/chosen": -249.04319763183594, "logps/rejected": -358.57574462890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.42882004380226135, "rewards/margins": 17.76966094970703, "rewards/rejected": -18.198482513427734, "step": 3231 }, { "epoch": 1.1, "learning_rate": 8.819960701251175e-07, "logits/chosen": -0.090401791036129, "logits/rejected": -0.08807362616062164, "logps/chosen": -152.9521942138672, "logps/rejected": -246.42239379882812, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.5102450847625732, "rewards/margins": 10.090290069580078, "rewards/rejected": -11.60053539276123, "step": 3232 }, { "epoch": 1.1, "learning_rate": 8.81447241810689e-07, "logits/chosen": -0.23845066130161285, "logits/rejected": -0.2332703322172165, "logps/chosen": -170.32908630371094, "logps/rejected": -337.2148742675781, "loss": 0.0164, "rewards/accuracies": 0.9375, "rewards/chosen": -2.461169958114624, "rewards/margins": 14.215356826782227, "rewards/rejected": -16.676525115966797, "step": 3233 }, { "epoch": 1.1, "learning_rate": 8.808984497124923e-07, "logits/chosen": -0.17302124202251434, "logits/rejected": -0.13034304976463318, "logps/chosen": -225.0258331298828, "logps/rejected": -286.6680908203125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5015615224838257, "rewards/margins": 12.35038948059082, "rewards/rejected": -12.851951599121094, "step": 3234 }, { "epoch": 1.1, "learning_rate": 8.803496939981757e-07, "logits/chosen": -0.2656996548175812, "logits/rejected": -0.25458234548568726, "logps/chosen": -258.477783203125, "logps/rejected": -471.115478515625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.7551389932632446, "rewards/margins": 21.664934158325195, "rewards/rejected": -23.420072555541992, "step": 3235 }, { "epoch": 1.1, "learning_rate": 8.798009748353764e-07, "logits/chosen": -0.11380933225154877, "logits/rejected": -0.0935986191034317, "logps/chosen": -132.55142211914062, "logps/rejected": -249.52288818359375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.0381735563278198, "rewards/margins": 13.242408752441406, "rewards/rejected": -14.280582427978516, "step": 3236 }, { "epoch": 1.1, "learning_rate": 8.792522923917206e-07, "logits/chosen": -0.27805569767951965, "logits/rejected": -0.19906584918498993, "logps/chosen": -193.6251220703125, "logps/rejected": -339.41278076171875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.141122579574585, "rewards/margins": 17.30759048461914, "rewards/rejected": -18.448715209960938, "step": 3237 }, { "epoch": 1.11, "learning_rate": 8.787036468348228e-07, "logits/chosen": -0.11625147610902786, "logits/rejected": -0.08203297108411789, "logps/chosen": -210.47647094726562, "logps/rejected": -326.05743408203125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8469932079315186, "rewards/margins": 15.455192565917969, "rewards/rejected": -16.30218505859375, "step": 3238 }, { "epoch": 1.11, "learning_rate": 8.781550383322872e-07, "logits/chosen": -0.23271532356739044, "logits/rejected": -0.19167117774486542, "logps/chosen": -138.17422485351562, "logps/rejected": -270.7464294433594, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.291704773902893, "rewards/margins": 14.700518608093262, "rewards/rejected": -15.992223739624023, "step": 3239 }, { "epoch": 1.11, "learning_rate": 8.776064670517058e-07, "logits/chosen": -0.15552885830402374, "logits/rejected": -0.11506665498018265, "logps/chosen": -219.73410034179688, "logps/rejected": -299.860107421875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5245228409767151, "rewards/margins": 14.192190170288086, "rewards/rejected": -14.716712951660156, "step": 3240 }, { "epoch": 1.11, "learning_rate": 8.770579331606598e-07, "logits/chosen": -0.17787563800811768, "logits/rejected": -0.17391280829906464, "logps/chosen": -121.88987731933594, "logps/rejected": -329.520751953125, "loss": 0.022, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7113298177719116, "rewards/margins": 16.80550765991211, "rewards/rejected": -17.51683807373047, "step": 3241 }, { "epoch": 1.11, "learning_rate": 8.765094368267185e-07, "logits/chosen": -0.15215806663036346, "logits/rejected": -0.11550995707511902, "logps/chosen": -206.71852111816406, "logps/rejected": -348.0515441894531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.818434476852417, "rewards/margins": 17.172733306884766, "rewards/rejected": -19.991168975830078, "step": 3242 }, { "epoch": 1.11, "learning_rate": 8.759609782174399e-07, "logits/chosen": -0.12472698092460632, "logits/rejected": -0.1025652289390564, "logps/chosen": -229.30731201171875, "logps/rejected": -332.7777404785156, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.5065743923187256, "rewards/margins": 14.744213104248047, "rewards/rejected": -16.250789642333984, "step": 3243 }, { "epoch": 1.11, "learning_rate": 8.754125575003708e-07, "logits/chosen": -0.15342289209365845, "logits/rejected": -0.09945950657129288, "logps/chosen": -260.9218444824219, "logps/rejected": -299.56304931640625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.428232192993164, "rewards/margins": 11.056756973266602, "rewards/rejected": -12.484989166259766, "step": 3244 }, { "epoch": 1.11, "learning_rate": 8.748641748430458e-07, "logits/chosen": -0.2571480870246887, "logits/rejected": -0.22770124673843384, "logps/chosen": -204.7858428955078, "logps/rejected": -376.0509338378906, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.9245331287384033, "rewards/margins": 16.941038131713867, "rewards/rejected": -19.865571975708008, "step": 3245 }, { "epoch": 1.11, "learning_rate": 8.74315830412989e-07, "logits/chosen": -0.16268806159496307, "logits/rejected": -0.13690458238124847, "logps/chosen": -244.8837890625, "logps/rejected": -387.4224548339844, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.174245834350586, "rewards/margins": 15.864341735839844, "rewards/rejected": -18.03858757019043, "step": 3246 }, { "epoch": 1.11, "learning_rate": 8.737675243777113e-07, "logits/chosen": -0.2388429045677185, "logits/rejected": -0.22942006587982178, "logps/chosen": -256.6896667480469, "logps/rejected": -401.6895446777344, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.4233226776123047, "rewards/margins": 14.88551139831543, "rewards/rejected": -17.308834075927734, "step": 3247 }, { "epoch": 1.11, "learning_rate": 8.732192569047124e-07, "logits/chosen": -0.20522885024547577, "logits/rejected": -0.18875043094158173, "logps/chosen": -201.42245483398438, "logps/rejected": -344.6607666015625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7962102890014648, "rewards/margins": 15.029815673828125, "rewards/rejected": -16.826025009155273, "step": 3248 }, { "epoch": 1.11, "learning_rate": 8.726710281614814e-07, "logits/chosen": -0.01894804835319519, "logits/rejected": -0.012425664812326431, "logps/chosen": -164.02072143554688, "logps/rejected": -348.54315185546875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.5087993144989014, "rewards/margins": 14.980823516845703, "rewards/rejected": -17.4896240234375, "step": 3249 }, { "epoch": 1.11, "learning_rate": 8.721228383154939e-07, "logits/chosen": -0.11146202683448792, "logits/rejected": -0.06942754238843918, "logps/chosen": -239.84304809570312, "logps/rejected": -395.1781311035156, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.6176247596740723, "rewards/margins": 16.36086654663086, "rewards/rejected": -17.978492736816406, "step": 3250 }, { "epoch": 1.11, "learning_rate": 8.715746875342147e-07, "logits/chosen": -0.124075748026371, "logits/rejected": -0.07170552015304565, "logps/chosen": -213.4837646484375, "logps/rejected": -325.0825500488281, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4178087711334229, "rewards/margins": 17.683591842651367, "rewards/rejected": -19.101402282714844, "step": 3251 }, { "epoch": 1.11, "learning_rate": 8.710265759850962e-07, "logits/chosen": -0.09316597878932953, "logits/rejected": -0.08962829411029816, "logps/chosen": -174.52987670898438, "logps/rejected": -331.45166015625, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.6842676997184753, "rewards/margins": 12.795235633850098, "rewards/rejected": -13.47950267791748, "step": 3252 }, { "epoch": 1.11, "learning_rate": 8.704785038355786e-07, "logits/chosen": -0.15680135786533356, "logits/rejected": -0.12322688847780228, "logps/chosen": -204.2479705810547, "logps/rejected": -282.6111755371094, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0142037868499756, "rewards/margins": 11.246554374694824, "rewards/rejected": -13.260758399963379, "step": 3253 }, { "epoch": 1.11, "learning_rate": 8.699304712530909e-07, "logits/chosen": -0.06703942269086838, "logits/rejected": -0.0616329088807106, "logps/chosen": -164.77513122558594, "logps/rejected": -368.7225036621094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1763083934783936, "rewards/margins": 16.65732765197754, "rewards/rejected": -18.833635330200195, "step": 3254 }, { "epoch": 1.11, "learning_rate": 8.693824784050491e-07, "logits/chosen": -0.1198500543832779, "logits/rejected": -0.12592405080795288, "logps/chosen": -209.00184631347656, "logps/rejected": -450.0238952636719, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.3038711547851562, "rewards/margins": 18.61328887939453, "rewards/rejected": -21.917160034179688, "step": 3255 }, { "epoch": 1.11, "learning_rate": 8.688345254588577e-07, "logits/chosen": -0.13738685846328735, "logits/rejected": -0.14997506141662598, "logps/chosen": -177.1079864501953, "logps/rejected": -377.23785400390625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4565578699111938, "rewards/margins": 15.244234085083008, "rewards/rejected": -16.700794219970703, "step": 3256 }, { "epoch": 1.11, "learning_rate": 8.682866125819087e-07, "logits/chosen": -0.15767203271389008, "logits/rejected": -0.1214369386434555, "logps/chosen": -260.8951721191406, "logps/rejected": -392.69097900390625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.7622244358062744, "rewards/margins": 15.973299026489258, "rewards/rejected": -17.735523223876953, "step": 3257 }, { "epoch": 1.11, "learning_rate": 8.677387399415816e-07, "logits/chosen": -0.1978449523448944, "logits/rejected": -0.1268032193183899, "logps/chosen": -196.8340606689453, "logps/rejected": -319.927001953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5854570865631104, "rewards/margins": 15.435173034667969, "rewards/rejected": -18.0206298828125, "step": 3258 }, { "epoch": 1.11, "learning_rate": 8.671909077052444e-07, "logits/chosen": -0.046363137662410736, "logits/rejected": -0.027055583894252777, "logps/chosen": -181.67965698242188, "logps/rejected": -323.91741943359375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.1700702905654907, "rewards/margins": 16.521751403808594, "rewards/rejected": -17.691822052001953, "step": 3259 }, { "epoch": 1.11, "learning_rate": 8.666431160402518e-07, "logits/chosen": -0.0810728594660759, "logits/rejected": -0.048348426818847656, "logps/chosen": -191.85702514648438, "logps/rejected": -358.805419921875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.3187628984451294, "rewards/margins": 18.866422653198242, "rewards/rejected": -20.1851863861084, "step": 3260 }, { "epoch": 1.11, "learning_rate": 8.660953651139474e-07, "logits/chosen": -0.0759986937046051, "logits/rejected": -0.05751063674688339, "logps/chosen": -234.4054718017578, "logps/rejected": -460.3473205566406, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -1.2208468914031982, "rewards/margins": 19.979887008666992, "rewards/rejected": -21.200733184814453, "step": 3261 }, { "epoch": 1.11, "learning_rate": 8.655476550936609e-07, "logits/chosen": -0.12748128175735474, "logits/rejected": -0.10348626971244812, "logps/chosen": -225.7503662109375, "logps/rejected": -327.66156005859375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.024500846862793, "rewards/margins": 14.753527641296387, "rewards/rejected": -16.77802848815918, "step": 3262 }, { "epoch": 1.11, "learning_rate": 8.649999861467097e-07, "logits/chosen": -0.03610333800315857, "logits/rejected": -0.03109157457947731, "logps/chosen": -130.64126586914062, "logps/rejected": -355.8831787109375, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": -1.6655462980270386, "rewards/margins": 18.24297332763672, "rewards/rejected": -19.908517837524414, "step": 3263 }, { "epoch": 1.11, "learning_rate": 8.644523584404003e-07, "logits/chosen": -0.1475169062614441, "logits/rejected": -0.10872246325016022, "logps/chosen": -196.84878540039062, "logps/rejected": -287.4153137207031, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4130053520202637, "rewards/margins": 13.111310958862305, "rewards/rejected": -14.524314880371094, "step": 3264 }, { "epoch": 1.11, "learning_rate": 8.639047721420245e-07, "logits/chosen": -0.12957052886486053, "logits/rejected": -0.10856936126947403, "logps/chosen": -162.7900848388672, "logps/rejected": -357.4135437011719, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.1036937236785889, "rewards/margins": 17.8854923248291, "rewards/rejected": -18.989185333251953, "step": 3265 }, { "epoch": 1.11, "learning_rate": 8.633572274188629e-07, "logits/chosen": -0.09327880293130875, "logits/rejected": -0.05791265517473221, "logps/chosen": -185.54551696777344, "logps/rejected": -277.90185546875, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.9657357931137085, "rewards/margins": 12.757694244384766, "rewards/rejected": -13.723430633544922, "step": 3266 }, { "epoch": 1.12, "learning_rate": 8.628097244381826e-07, "logits/chosen": -0.17776399850845337, "logits/rejected": -0.15137144923210144, "logps/chosen": -274.2877197265625, "logps/rejected": -389.5100402832031, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.7108566761016846, "rewards/margins": 15.86290168762207, "rewards/rejected": -17.57375717163086, "step": 3267 }, { "epoch": 1.12, "learning_rate": 8.622622633672379e-07, "logits/chosen": -0.18263131380081177, "logits/rejected": -0.16035453975200653, "logps/chosen": -208.0564727783203, "logps/rejected": -317.77728271484375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.5103601217269897, "rewards/margins": 12.01812744140625, "rewards/rejected": -13.528488159179688, "step": 3268 }, { "epoch": 1.12, "learning_rate": 8.617148443732713e-07, "logits/chosen": -0.1627987176179886, "logits/rejected": -0.17038963735103607, "logps/chosen": -219.2198486328125, "logps/rejected": -408.3839416503906, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.3204678297042847, "rewards/margins": 15.699104309082031, "rewards/rejected": -17.01957130432129, "step": 3269 }, { "epoch": 1.12, "learning_rate": 8.611674676235114e-07, "logits/chosen": -0.10809314996004105, "logits/rejected": -0.08977660536766052, "logps/chosen": -196.5556640625, "logps/rejected": -393.9436950683594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7115710973739624, "rewards/margins": 18.85403823852539, "rewards/rejected": -20.565608978271484, "step": 3270 }, { "epoch": 1.12, "learning_rate": 8.606201332851738e-07, "logits/chosen": -0.02977341040968895, "logits/rejected": 0.00669922074303031, "logps/chosen": -179.1855010986328, "logps/rejected": -323.9041442871094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5549674034118652, "rewards/margins": 16.168859481811523, "rewards/rejected": -17.723827362060547, "step": 3271 }, { "epoch": 1.12, "learning_rate": 8.600728415254624e-07, "logits/chosen": -0.07140408456325531, "logits/rejected": -0.05052808299660683, "logps/chosen": -203.17120361328125, "logps/rejected": -325.0264587402344, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.7787623405456543, "rewards/margins": 13.021114349365234, "rewards/rejected": -13.799877166748047, "step": 3272 }, { "epoch": 1.12, "learning_rate": 8.595255925115666e-07, "logits/chosen": -0.14452165365219116, "logits/rejected": -0.13533338904380798, "logps/chosen": -201.62619018554688, "logps/rejected": -369.5436706542969, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -3.829641819000244, "rewards/margins": 14.497062683105469, "rewards/rejected": -18.326702117919922, "step": 3273 }, { "epoch": 1.12, "learning_rate": 8.589783864106638e-07, "logits/chosen": -0.0763854905962944, "logits/rejected": -0.0667402520775795, "logps/chosen": -117.3144302368164, "logps/rejected": -275.4015197753906, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5386501550674438, "rewards/margins": 13.852228164672852, "rewards/rejected": -15.390878677368164, "step": 3274 }, { "epoch": 1.12, "learning_rate": 8.584312233899179e-07, "logits/chosen": -0.053403258323669434, "logits/rejected": -0.04138532653450966, "logps/chosen": -176.3282470703125, "logps/rejected": -309.8211975097656, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.27139514684677124, "rewards/margins": 14.728706359863281, "rewards/rejected": -15.000101089477539, "step": 3275 }, { "epoch": 1.12, "learning_rate": 8.578841036164794e-07, "logits/chosen": -0.030437856912612915, "logits/rejected": -0.027544809505343437, "logps/chosen": -194.30076599121094, "logps/rejected": -358.7881164550781, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.7487643957138062, "rewards/margins": 13.982346534729004, "rewards/rejected": -14.731111526489258, "step": 3276 }, { "epoch": 1.12, "learning_rate": 8.57337027257486e-07, "logits/chosen": -0.20317795872688293, "logits/rejected": -0.16516008973121643, "logps/chosen": -236.78358459472656, "logps/rejected": -311.49560546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.7104765176773071, "rewards/margins": 11.454049110412598, "rewards/rejected": -13.164525032043457, "step": 3277 }, { "epoch": 1.12, "learning_rate": 8.567899944800619e-07, "logits/chosen": -0.07391254603862762, "logits/rejected": -0.06124087795615196, "logps/chosen": -253.30224609375, "logps/rejected": -452.05078125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.3897053003311157, "rewards/margins": 19.958572387695312, "rewards/rejected": -21.348278045654297, "step": 3278 }, { "epoch": 1.12, "learning_rate": 8.562430054513183e-07, "logits/chosen": -0.16606898605823517, "logits/rejected": -0.16477477550506592, "logps/chosen": -230.10317993164062, "logps/rejected": -460.7624816894531, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.19069804251194, "rewards/margins": 21.30494499206543, "rewards/rejected": -21.495643615722656, "step": 3279 }, { "epoch": 1.12, "learning_rate": 8.556960603383529e-07, "logits/chosen": -0.15261660516262054, "logits/rejected": -0.13615913689136505, "logps/chosen": -175.9403839111328, "logps/rejected": -339.53009033203125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.7366481423377991, "rewards/margins": 15.68253231048584, "rewards/rejected": -16.41918182373047, "step": 3280 }, { "epoch": 1.12, "learning_rate": 8.55149159308249e-07, "logits/chosen": -0.1378290355205536, "logits/rejected": -0.11952446401119232, "logps/chosen": -186.65554809570312, "logps/rejected": -332.630126953125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0501115322113037, "rewards/margins": 14.758068084716797, "rewards/rejected": -15.808177947998047, "step": 3281 }, { "epoch": 1.12, "learning_rate": 8.546023025280784e-07, "logits/chosen": -0.10277839750051498, "logits/rejected": -0.045729830861091614, "logps/chosen": -195.03265380859375, "logps/rejected": -276.8619384765625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.4602080583572388, "rewards/margins": 12.843544006347656, "rewards/rejected": -14.303751945495605, "step": 3282 }, { "epoch": 1.12, "learning_rate": 8.540554901648977e-07, "logits/chosen": -0.06238364428281784, "logits/rejected": -0.03807065263390541, "logps/chosen": -173.03024291992188, "logps/rejected": -286.289306640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5532550811767578, "rewards/margins": 13.822181701660156, "rewards/rejected": -15.375436782836914, "step": 3283 }, { "epoch": 1.12, "learning_rate": 8.535087223857508e-07, "logits/chosen": -0.0597030408680439, "logits/rejected": -0.026387633755803108, "logps/chosen": -196.07118225097656, "logps/rejected": -272.05517578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.6352096796035767, "rewards/margins": 11.243054389953613, "rewards/rejected": -12.878263473510742, "step": 3284 }, { "epoch": 1.12, "learning_rate": 8.529619993576676e-07, "logits/chosen": -0.1020655706524849, "logits/rejected": -0.07460198551416397, "logps/chosen": -199.9539031982422, "logps/rejected": -347.4017028808594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.7295217514038086, "rewards/margins": 15.834142684936523, "rewards/rejected": -18.56366539001465, "step": 3285 }, { "epoch": 1.12, "learning_rate": 8.524153212476643e-07, "logits/chosen": -0.11616247147321701, "logits/rejected": -0.08100268244743347, "logps/chosen": -207.9994659423828, "logps/rejected": -337.8747253417969, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.905272960662842, "rewards/margins": 14.740690231323242, "rewards/rejected": -17.645963668823242, "step": 3286 }, { "epoch": 1.12, "learning_rate": 8.518686882227437e-07, "logits/chosen": -0.04726487025618553, "logits/rejected": -0.049351535737514496, "logps/chosen": -166.0015106201172, "logps/rejected": -332.9669494628906, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.6942226886749268, "rewards/margins": 14.731880187988281, "rewards/rejected": -17.426103591918945, "step": 3287 }, { "epoch": 1.12, "learning_rate": 8.513221004498945e-07, "logits/chosen": -0.05611800402402878, "logits/rejected": -0.03997190296649933, "logps/chosen": -203.54974365234375, "logps/rejected": -361.2753601074219, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.472470998764038, "rewards/margins": 15.682572364807129, "rewards/rejected": -18.155044555664062, "step": 3288 }, { "epoch": 1.12, "learning_rate": 8.50775558096092e-07, "logits/chosen": -0.09692935645580292, "logits/rejected": -0.06298572570085526, "logps/chosen": -212.08377075195312, "logps/rejected": -374.658203125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.6226016283035278, "rewards/margins": 17.192947387695312, "rewards/rejected": -18.815547943115234, "step": 3289 }, { "epoch": 1.12, "learning_rate": 8.502290613282972e-07, "logits/chosen": -0.03252580389380455, "logits/rejected": -0.001783237443305552, "logps/chosen": -201.75567626953125, "logps/rejected": -322.67340087890625, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -1.3534605503082275, "rewards/margins": 16.757009506225586, "rewards/rejected": -18.110471725463867, "step": 3290 }, { "epoch": 1.12, "learning_rate": 8.49682610313457e-07, "logits/chosen": -0.07372435927391052, "logits/rejected": -0.03633182868361473, "logps/chosen": -191.4383087158203, "logps/rejected": -298.7107238769531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.6456143856048584, "rewards/margins": 13.165895462036133, "rewards/rejected": -15.811509132385254, "step": 3291 }, { "epoch": 1.12, "learning_rate": 8.491362052185052e-07, "logits/chosen": -0.2309846431016922, "logits/rejected": -0.20813190937042236, "logps/chosen": -189.90975952148438, "logps/rejected": -353.3541259765625, "loss": 0.0273, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7373878955841064, "rewards/margins": 15.468718528747559, "rewards/rejected": -17.20610809326172, "step": 3292 }, { "epoch": 1.12, "learning_rate": 8.485898462103602e-07, "logits/chosen": -0.24176834523677826, "logits/rejected": -0.21706564724445343, "logps/chosen": -251.85308837890625, "logps/rejected": -424.9985656738281, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.615809917449951, "rewards/margins": 17.951766967773438, "rewards/rejected": -20.567577362060547, "step": 3293 }, { "epoch": 1.12, "learning_rate": 8.480435334559282e-07, "logits/chosen": -0.19940169155597687, "logits/rejected": -0.1684565544128418, "logps/chosen": -238.63255310058594, "logps/rejected": -390.2751770019531, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.467200756072998, "rewards/margins": 14.608194351196289, "rewards/rejected": -16.075395584106445, "step": 3294 }, { "epoch": 1.12, "learning_rate": 8.474972671220998e-07, "logits/chosen": -0.05760970711708069, "logits/rejected": -0.03974920138716698, "logps/chosen": -217.67893981933594, "logps/rejected": -362.9439697265625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.9119928479194641, "rewards/margins": 17.32217788696289, "rewards/rejected": -18.23417091369629, "step": 3295 }, { "epoch": 1.12, "learning_rate": 8.469510473757512e-07, "logits/chosen": -0.027380989864468575, "logits/rejected": 0.00041718437569215894, "logps/chosen": -185.714111328125, "logps/rejected": -360.6358337402344, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -3.046481132507324, "rewards/margins": 18.135000228881836, "rewards/rejected": -21.181482315063477, "step": 3296 }, { "epoch": 1.13, "learning_rate": 8.464048743837459e-07, "logits/chosen": -0.08316227048635483, "logits/rejected": -0.06316496431827545, "logps/chosen": -150.6172637939453, "logps/rejected": -302.9540710449219, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.3558924198150635, "rewards/margins": 14.858184814453125, "rewards/rejected": -16.21407699584961, "step": 3297 }, { "epoch": 1.13, "learning_rate": 8.458587483129314e-07, "logits/chosen": -0.08658497035503387, "logits/rejected": -0.04660622775554657, "logps/chosen": -230.60528564453125, "logps/rejected": -391.6935119628906, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": -1.4720697402954102, "rewards/margins": 18.572906494140625, "rewards/rejected": -20.04497528076172, "step": 3298 }, { "epoch": 1.13, "learning_rate": 8.453126693301425e-07, "logits/chosen": -0.009823601692914963, "logits/rejected": -0.013809354044497013, "logps/chosen": -198.68284606933594, "logps/rejected": -393.6335754394531, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6125026941299438, "rewards/margins": 17.006622314453125, "rewards/rejected": -18.619125366210938, "step": 3299 }, { "epoch": 1.13, "learning_rate": 8.447666376021984e-07, "logits/chosen": -0.11573424935340881, "logits/rejected": -0.10252624750137329, "logps/chosen": -172.58314514160156, "logps/rejected": -376.96771240234375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.683506965637207, "rewards/margins": 17.62674331665039, "rewards/rejected": -19.310253143310547, "step": 3300 }, { "epoch": 1.13, "learning_rate": 8.44220653295904e-07, "logits/chosen": -0.17124967277050018, "logits/rejected": -0.1084841936826706, "logps/chosen": -259.07733154296875, "logps/rejected": -328.29046630859375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.0098469257354736, "rewards/margins": 13.702447891235352, "rewards/rejected": -14.712294578552246, "step": 3301 }, { "epoch": 1.13, "learning_rate": 8.436747165780505e-07, "logits/chosen": -0.2548144459724426, "logits/rejected": -0.23524019122123718, "logps/chosen": -188.12950134277344, "logps/rejected": -312.7713623046875, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -1.4676687717437744, "rewards/margins": 13.31518840789795, "rewards/rejected": -14.782857894897461, "step": 3302 }, { "epoch": 1.13, "learning_rate": 8.431288276154135e-07, "logits/chosen": -0.11612601578235626, "logits/rejected": -0.09760667383670807, "logps/chosen": -284.6216125488281, "logps/rejected": -439.3323669433594, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.7773513793945312, "rewards/margins": 18.496627807617188, "rewards/rejected": -20.27397918701172, "step": 3303 }, { "epoch": 1.13, "learning_rate": 8.425829865747549e-07, "logits/chosen": -0.10422994196414948, "logits/rejected": -0.10065603256225586, "logps/chosen": -191.9299774169922, "logps/rejected": -424.99072265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9907053709030151, "rewards/margins": 21.74458122253418, "rewards/rejected": -22.73528480529785, "step": 3304 }, { "epoch": 1.13, "learning_rate": 8.420371936228217e-07, "logits/chosen": -0.07025137543678284, "logits/rejected": -0.03275084123015404, "logps/chosen": -185.39137268066406, "logps/rejected": -347.3377380371094, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -3.100914478302002, "rewards/margins": 15.961560249328613, "rewards/rejected": -19.062475204467773, "step": 3305 }, { "epoch": 1.13, "learning_rate": 8.414914489263455e-07, "logits/chosen": -0.051203228533267975, "logits/rejected": -0.0403200164437294, "logps/chosen": -162.84291076660156, "logps/rejected": -356.6452331542969, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -2.6402406692504883, "rewards/margins": 16.39090347290039, "rewards/rejected": -19.031145095825195, "step": 3306 }, { "epoch": 1.13, "learning_rate": 8.409457526520445e-07, "logits/chosen": -0.10999894142150879, "logits/rejected": -0.09922187030315399, "logps/chosen": -213.45545959472656, "logps/rejected": -396.7947692871094, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -2.3834118843078613, "rewards/margins": 16.72492790222168, "rewards/rejected": -19.108339309692383, "step": 3307 }, { "epoch": 1.13, "learning_rate": 8.40400104966621e-07, "logits/chosen": -0.01431950367987156, "logits/rejected": 0.016231508925557137, "logps/chosen": -206.40570068359375, "logps/rejected": -419.75946044921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.7525273561477661, "rewards/margins": 18.971452713012695, "rewards/rejected": -20.723979949951172, "step": 3308 }, { "epoch": 1.13, "learning_rate": 8.398545060367627e-07, "logits/chosen": -0.06705783307552338, "logits/rejected": -0.008911436423659325, "logps/chosen": -219.937744140625, "logps/rejected": -327.0373229980469, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.9890390634536743, "rewards/margins": 16.169536590576172, "rewards/rejected": -18.15857696533203, "step": 3309 }, { "epoch": 1.13, "learning_rate": 8.393089560291431e-07, "logits/chosen": -0.0693344995379448, "logits/rejected": -0.029411928728222847, "logps/chosen": -152.9727020263672, "logps/rejected": -312.5487365722656, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -2.3105952739715576, "rewards/margins": 16.31800079345703, "rewards/rejected": -18.628597259521484, "step": 3310 }, { "epoch": 1.13, "learning_rate": 8.387634551104192e-07, "logits/chosen": -0.11203281581401825, "logits/rejected": -0.08979938924312592, "logps/chosen": -209.43284606933594, "logps/rejected": -295.9445495605469, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.989845871925354, "rewards/margins": 10.741744995117188, "rewards/rejected": -12.731590270996094, "step": 3311 }, { "epoch": 1.13, "learning_rate": 8.382180034472353e-07, "logits/chosen": -0.1328369528055191, "logits/rejected": -0.12746702134609222, "logps/chosen": -228.1156005859375, "logps/rejected": -416.0087890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.7750660181045532, "rewards/margins": 17.9821720123291, "rewards/rejected": -19.757238388061523, "step": 3312 }, { "epoch": 1.13, "learning_rate": 8.376726012062187e-07, "logits/chosen": -0.061891745775938034, "logits/rejected": -0.04908354580402374, "logps/chosen": -150.8157196044922, "logps/rejected": -293.9805908203125, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.6364686489105225, "rewards/margins": 13.064742088317871, "rewards/rejected": -14.701210975646973, "step": 3313 }, { "epoch": 1.13, "learning_rate": 8.371272485539817e-07, "logits/chosen": -0.05612599477171898, "logits/rejected": -0.007379133254289627, "logps/chosen": -163.831298828125, "logps/rejected": -242.39198303222656, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.260570764541626, "rewards/margins": 12.649213790893555, "rewards/rejected": -13.909784317016602, "step": 3314 }, { "epoch": 1.13, "learning_rate": 8.365819456571232e-07, "logits/chosen": -0.12327352911233902, "logits/rejected": -0.07922480255365372, "logps/chosen": -262.50732421875, "logps/rejected": -394.3796691894531, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.189534530043602, "rewards/margins": 19.183359146118164, "rewards/rejected": -19.372892379760742, "step": 3315 }, { "epoch": 1.13, "learning_rate": 8.360366926822246e-07, "logits/chosen": -0.06263038516044617, "logits/rejected": -0.04463587701320648, "logps/chosen": -190.6092987060547, "logps/rejected": -301.6354675292969, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.4881060123443604, "rewards/margins": 12.668018341064453, "rewards/rejected": -14.156123161315918, "step": 3316 }, { "epoch": 1.13, "learning_rate": 8.354914897958541e-07, "logits/chosen": -0.14258916676044464, "logits/rejected": -0.09522956609725952, "logps/chosen": -190.45997619628906, "logps/rejected": -357.4286804199219, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": -0.8040826916694641, "rewards/margins": 19.6485595703125, "rewards/rejected": -20.45264434814453, "step": 3317 }, { "epoch": 1.13, "learning_rate": 8.349463371645629e-07, "logits/chosen": -0.06873084604740143, "logits/rejected": -0.043516069650650024, "logps/chosen": -220.9550018310547, "logps/rejected": -410.4327392578125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.9841742515563965, "rewards/margins": 17.79498863220215, "rewards/rejected": -18.779165267944336, "step": 3318 }, { "epoch": 1.13, "learning_rate": 8.344012349548879e-07, "logits/chosen": -0.048957400023937225, "logits/rejected": -0.03996123746037483, "logps/chosen": -202.8452606201172, "logps/rejected": -442.49334716796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3942461013793945, "rewards/margins": 20.473209381103516, "rewards/rejected": -22.867456436157227, "step": 3319 }, { "epoch": 1.13, "learning_rate": 8.338561833333505e-07, "logits/chosen": -0.1722375750541687, "logits/rejected": -0.11512421816587448, "logps/chosen": -259.09600830078125, "logps/rejected": -383.88037109375, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -2.596219539642334, "rewards/margins": 15.970385551452637, "rewards/rejected": -18.566606521606445, "step": 3320 }, { "epoch": 1.13, "learning_rate": 8.33311182466456e-07, "logits/chosen": -0.10275240987539291, "logits/rejected": -0.06562040746212006, "logps/chosen": -217.56553649902344, "logps/rejected": -357.88037109375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.2617292404174805, "rewards/margins": 17.03287124633789, "rewards/rejected": -19.294599533081055, "step": 3321 }, { "epoch": 1.13, "learning_rate": 8.327662325206952e-07, "logits/chosen": -0.15402978658676147, "logits/rejected": -0.11313623189926147, "logps/chosen": -216.0983428955078, "logps/rejected": -329.5981750488281, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.329526424407959, "rewards/margins": 15.70246410369873, "rewards/rejected": -18.03199005126953, "step": 3322 }, { "epoch": 1.13, "learning_rate": 8.322213336625424e-07, "logits/chosen": -0.06215081363916397, "logits/rejected": -0.03516995534300804, "logps/chosen": -169.64219665527344, "logps/rejected": -302.4112548828125, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.5097858905792236, "rewards/margins": 16.312480926513672, "rewards/rejected": -17.822265625, "step": 3323 }, { "epoch": 1.13, "learning_rate": 8.316764860584567e-07, "logits/chosen": -0.2285270243883133, "logits/rejected": -0.21500781178474426, "logps/chosen": -261.56695556640625, "logps/rejected": -399.029052734375, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.2470979690551758, "rewards/margins": 14.432059288024902, "rewards/rejected": -15.679157257080078, "step": 3324 }, { "epoch": 1.13, "learning_rate": 8.311316898748815e-07, "logits/chosen": -0.19826337695121765, "logits/rejected": -0.15068353712558746, "logps/chosen": -213.26065063476562, "logps/rejected": -350.77447509765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8032368421554565, "rewards/margins": 16.081195831298828, "rewards/rejected": -16.884431838989258, "step": 3325 }, { "epoch": 1.14, "learning_rate": 8.305869452782445e-07, "logits/chosen": -0.03429355099797249, "logits/rejected": 0.00035644148010760546, "logps/chosen": -197.5110626220703, "logps/rejected": -376.90118408203125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.6806625723838806, "rewards/margins": 17.6179256439209, "rewards/rejected": -18.2985897064209, "step": 3326 }, { "epoch": 1.14, "learning_rate": 8.300422524349579e-07, "logits/chosen": -0.1262277215719223, "logits/rejected": -0.11797818541526794, "logps/chosen": -180.45947265625, "logps/rejected": -358.8117370605469, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0356638431549072, "rewards/margins": 14.53170108795166, "rewards/rejected": -15.567364692687988, "step": 3327 }, { "epoch": 1.14, "learning_rate": 8.294976115114182e-07, "logits/chosen": -0.16743366420269012, "logits/rejected": -0.10917363315820694, "logps/chosen": -295.6249084472656, "logps/rejected": -371.2064514160156, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": 0.08219718933105469, "rewards/margins": 16.216779708862305, "rewards/rejected": -16.13458251953125, "step": 3328 }, { "epoch": 1.14, "learning_rate": 8.289530226740044e-07, "logits/chosen": -0.10490868240594864, "logits/rejected": -0.09062729775905609, "logps/chosen": -218.26193237304688, "logps/rejected": -467.9883728027344, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1662851572036743, "rewards/margins": 21.042070388793945, "rewards/rejected": -22.208354949951172, "step": 3329 }, { "epoch": 1.14, "learning_rate": 8.284084860890822e-07, "logits/chosen": -0.08456502109766006, "logits/rejected": -0.0504770465195179, "logps/chosen": -214.07847595214844, "logps/rejected": -351.1385803222656, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.9739155769348145, "rewards/margins": 13.886489868164062, "rewards/rejected": -15.860405921936035, "step": 3330 }, { "epoch": 1.14, "learning_rate": 8.278640019229992e-07, "logits/chosen": -0.09205342829227448, "logits/rejected": -0.05425342172384262, "logps/chosen": -217.68711853027344, "logps/rejected": -391.6258544921875, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -2.0921456813812256, "rewards/margins": 16.990015029907227, "rewards/rejected": -19.08216094970703, "step": 3331 }, { "epoch": 1.14, "learning_rate": 8.273195703420883e-07, "logits/chosen": -0.01000358909368515, "logits/rejected": 0.013494710437953472, "logps/chosen": -171.81271362304688, "logps/rejected": -330.3440246582031, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9052760004997253, "rewards/margins": 14.680706024169922, "rewards/rejected": -15.585981369018555, "step": 3332 }, { "epoch": 1.14, "learning_rate": 8.267751915126657e-07, "logits/chosen": -0.14035525918006897, "logits/rejected": -0.11415673792362213, "logps/chosen": -198.18850708007812, "logps/rejected": -364.7393493652344, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.5611042976379395, "rewards/margins": 17.505252838134766, "rewards/rejected": -18.06635856628418, "step": 3333 }, { "epoch": 1.14, "learning_rate": 8.262308656010313e-07, "logits/chosen": -0.12598079442977905, "logits/rejected": -0.07054878771305084, "logps/chosen": -258.6879577636719, "logps/rejected": -477.4959716796875, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.592259168624878, "rewards/margins": 23.131546020507812, "rewards/rejected": -24.723804473876953, "step": 3334 }, { "epoch": 1.14, "learning_rate": 8.256865927734696e-07, "logits/chosen": -0.1299578845500946, "logits/rejected": -0.08241444826126099, "logps/chosen": -231.9178466796875, "logps/rejected": -350.65667724609375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.0249968767166138, "rewards/margins": 16.616470336914062, "rewards/rejected": -17.64146614074707, "step": 3335 }, { "epoch": 1.14, "learning_rate": 8.251423731962479e-07, "logits/chosen": -0.11452905088663101, "logits/rejected": -0.11780209094285965, "logps/chosen": -279.07568359375, "logps/rejected": -494.68646240234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.0546499490737915, "rewards/margins": 21.032886505126953, "rewards/rejected": -19.978235244750977, "step": 3336 }, { "epoch": 1.14, "learning_rate": 8.245982070356184e-07, "logits/chosen": -0.10231444984674454, "logits/rejected": -0.06240618973970413, "logps/chosen": -236.00311279296875, "logps/rejected": -328.88446044921875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.7417612075805664, "rewards/margins": 14.117868423461914, "rewards/rejected": -15.85962963104248, "step": 3337 }, { "epoch": 1.14, "learning_rate": 8.24054094457816e-07, "logits/chosen": -0.1761287897825241, "logits/rejected": -0.12312065809965134, "logps/chosen": -190.5673370361328, "logps/rejected": -357.78076171875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.49073293805122375, "rewards/margins": 18.334148406982422, "rewards/rejected": -18.824880599975586, "step": 3338 }, { "epoch": 1.14, "learning_rate": 8.235100356290595e-07, "logits/chosen": -0.04550662264227867, "logits/rejected": -0.032256439328193665, "logps/chosen": -189.00791931152344, "logps/rejected": -308.36602783203125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.8254587650299072, "rewards/margins": 13.220126152038574, "rewards/rejected": -15.045585632324219, "step": 3339 }, { "epoch": 1.14, "learning_rate": 8.229660307155517e-07, "logits/chosen": -0.008838390000164509, "logits/rejected": 0.012137415818870068, "logps/chosen": -215.50930786132812, "logps/rejected": -367.95263671875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.3793654441833496, "rewards/margins": 18.264850616455078, "rewards/rejected": -18.644214630126953, "step": 3340 }, { "epoch": 1.14, "learning_rate": 8.22422079883478e-07, "logits/chosen": -0.14199639856815338, "logits/rejected": -0.10546489804983139, "logps/chosen": -190.82090759277344, "logps/rejected": -342.69952392578125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1629502773284912, "rewards/margins": 15.414645195007324, "rewards/rejected": -16.57759666442871, "step": 3341 }, { "epoch": 1.14, "learning_rate": 8.218781832990087e-07, "logits/chosen": -0.10026371479034424, "logits/rejected": -0.05496833845973015, "logps/chosen": -216.97213745117188, "logps/rejected": -340.5969543457031, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.074230432510376, "rewards/margins": 15.87161636352539, "rewards/rejected": -17.945846557617188, "step": 3342 }, { "epoch": 1.14, "learning_rate": 8.213343411282964e-07, "logits/chosen": -0.07911397516727448, "logits/rejected": -0.05803302302956581, "logps/chosen": -196.18768310546875, "logps/rejected": -345.09552001953125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.7311445474624634, "rewards/margins": 14.147040367126465, "rewards/rejected": -15.878185272216797, "step": 3343 }, { "epoch": 1.14, "learning_rate": 8.207905535374766e-07, "logits/chosen": -0.030660199001431465, "logits/rejected": -0.011398233473300934, "logps/chosen": -217.69143676757812, "logps/rejected": -391.7916564941406, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.1923329830169678, "rewards/margins": 17.228836059570312, "rewards/rejected": -19.42116928100586, "step": 3344 }, { "epoch": 1.14, "learning_rate": 8.2024682069267e-07, "logits/chosen": -0.07729192078113556, "logits/rejected": -0.046828895807266235, "logps/chosen": -186.58843994140625, "logps/rejected": -314.40496826171875, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -3.740192174911499, "rewards/margins": 12.576064109802246, "rewards/rejected": -16.316255569458008, "step": 3345 }, { "epoch": 1.14, "learning_rate": 8.197031427599793e-07, "logits/chosen": -0.0813470184803009, "logits/rejected": -0.06324956566095352, "logps/chosen": -238.39028930664062, "logps/rejected": -410.4065246582031, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.3121447563171387, "rewards/margins": 18.270034790039062, "rewards/rejected": -19.58218002319336, "step": 3346 }, { "epoch": 1.14, "learning_rate": 8.191595199054897e-07, "logits/chosen": -0.10940445959568024, "logits/rejected": -0.07689045369625092, "logps/chosen": -178.95895385742188, "logps/rejected": -337.16473388671875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.0490434169769287, "rewards/margins": 15.564208984375, "rewards/rejected": -16.613252639770508, "step": 3347 }, { "epoch": 1.14, "learning_rate": 8.186159522952715e-07, "logits/chosen": -0.061858922243118286, "logits/rejected": -0.03837389871478081, "logps/chosen": -151.10342407226562, "logps/rejected": -309.4349060058594, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.4850943088531494, "rewards/margins": 14.595039367675781, "rewards/rejected": -17.08013343811035, "step": 3348 }, { "epoch": 1.14, "learning_rate": 8.180724400953765e-07, "logits/chosen": -0.012489677406847477, "logits/rejected": -0.0023297746665775776, "logps/chosen": -145.6331024169922, "logps/rejected": -324.4629211425781, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.970736265182495, "rewards/margins": 14.561626434326172, "rewards/rejected": -17.532363891601562, "step": 3349 }, { "epoch": 1.14, "learning_rate": 8.175289834718406e-07, "logits/chosen": -0.049415480345487595, "logits/rejected": -0.009293513372540474, "logps/chosen": -196.0184326171875, "logps/rejected": -262.8193359375, "loss": 0.0141, "rewards/accuracies": 0.9375, "rewards/chosen": -0.27613595128059387, "rewards/margins": 11.917807579040527, "rewards/rejected": -12.193943977355957, "step": 3350 }, { "epoch": 1.14, "learning_rate": 8.169855825906821e-07, "logits/chosen": -0.11165660619735718, "logits/rejected": -0.07083878666162491, "logps/chosen": -243.06704711914062, "logps/rejected": -348.2738342285156, "loss": 0.0308, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5354939699172974, "rewards/margins": 14.343910217285156, "rewards/rejected": -15.879404067993164, "step": 3351 }, { "epoch": 1.14, "learning_rate": 8.164422376179022e-07, "logits/chosen": -0.030290106311440468, "logits/rejected": -0.005290213972330093, "logps/chosen": -193.81539916992188, "logps/rejected": -294.78021240234375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -2.3552372455596924, "rewards/margins": 12.690325736999512, "rewards/rejected": -15.045564651489258, "step": 3352 }, { "epoch": 1.14, "learning_rate": 8.158989487194859e-07, "logits/chosen": -0.12619102001190186, "logits/rejected": -0.13081075251102448, "logps/chosen": -250.8682861328125, "logps/rejected": -393.5301208496094, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.1453475952148438, "rewards/margins": 13.398821830749512, "rewards/rejected": -15.544170379638672, "step": 3353 }, { "epoch": 1.14, "learning_rate": 8.153557160613998e-07, "logits/chosen": -0.13746598362922668, "logits/rejected": -0.11016860604286194, "logps/chosen": -179.94241333007812, "logps/rejected": -296.8062438964844, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.19616246223449707, "rewards/margins": 14.421076774597168, "rewards/rejected": -14.617239952087402, "step": 3354 }, { "epoch": 1.15, "learning_rate": 8.148125398095945e-07, "logits/chosen": -0.033281359821558, "logits/rejected": -0.020619094371795654, "logps/chosen": -230.3758544921875, "logps/rejected": -380.8870544433594, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.891802191734314, "rewards/margins": 15.76394271850586, "rewards/rejected": -17.655746459960938, "step": 3355 }, { "epoch": 1.15, "learning_rate": 8.142694201300026e-07, "logits/chosen": -0.13753065466880798, "logits/rejected": -0.11437830328941345, "logps/chosen": -209.72607421875, "logps/rejected": -382.3042297363281, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.9884731769561768, "rewards/margins": 16.74386978149414, "rewards/rejected": -18.732343673706055, "step": 3356 }, { "epoch": 1.15, "learning_rate": 8.137263571885394e-07, "logits/chosen": -0.02509959414601326, "logits/rejected": -0.002067788504064083, "logps/chosen": -176.8029327392578, "logps/rejected": -332.5080871582031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.3418962955474854, "rewards/margins": 14.213889122009277, "rewards/rejected": -15.555785179138184, "step": 3357 }, { "epoch": 1.15, "learning_rate": 8.131833511511042e-07, "logits/chosen": 0.011656491085886955, "logits/rejected": 0.02428801916539669, "logps/chosen": -154.04579162597656, "logps/rejected": -276.07525634765625, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -2.624131441116333, "rewards/margins": 11.858987808227539, "rewards/rejected": -14.48311996459961, "step": 3358 }, { "epoch": 1.15, "learning_rate": 8.126404021835763e-07, "logits/chosen": -0.042713019996881485, "logits/rejected": -0.014288846403360367, "logps/chosen": -192.26487731933594, "logps/rejected": -336.43475341796875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.9604768753051758, "rewards/margins": 15.81608772277832, "rewards/rejected": -17.776565551757812, "step": 3359 }, { "epoch": 1.15, "learning_rate": 8.120975104518203e-07, "logits/chosen": -0.10168351978063583, "logits/rejected": -0.06452107429504395, "logps/chosen": -204.1766357421875, "logps/rejected": -367.08453369140625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.7160897254943848, "rewards/margins": 15.756791114807129, "rewards/rejected": -17.472881317138672, "step": 3360 }, { "epoch": 1.15, "learning_rate": 8.11554676121682e-07, "logits/chosen": -0.031342703849077225, "logits/rejected": 0.01838197372853756, "logps/chosen": -206.22927856445312, "logps/rejected": -289.9124755859375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.0305979251861572, "rewards/margins": 12.515100479125977, "rewards/rejected": -14.545698165893555, "step": 3361 }, { "epoch": 1.15, "learning_rate": 8.11011899358989e-07, "logits/chosen": -0.1816413700580597, "logits/rejected": -0.1571897715330124, "logps/chosen": -224.72132873535156, "logps/rejected": -368.8369445800781, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.3600631952285767, "rewards/margins": 16.767608642578125, "rewards/rejected": -18.127670288085938, "step": 3362 }, { "epoch": 1.15, "learning_rate": 8.104691803295529e-07, "logits/chosen": -0.14230090379714966, "logits/rejected": -0.08062972128391266, "logps/chosen": -274.00189208984375, "logps/rejected": -386.8524169921875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 0.2887760102748871, "rewards/margins": 19.40183448791504, "rewards/rejected": -19.113056182861328, "step": 3363 }, { "epoch": 1.15, "learning_rate": 8.099265191991665e-07, "logits/chosen": -0.008609278127551079, "logits/rejected": -0.026743093505501747, "logps/chosen": -147.63128662109375, "logps/rejected": -348.4458312988281, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5707470178604126, "rewards/margins": 15.684659957885742, "rewards/rejected": -17.255407333374023, "step": 3364 }, { "epoch": 1.15, "learning_rate": 8.093839161336056e-07, "logits/chosen": -0.16480067372322083, "logits/rejected": -0.10034896433353424, "logps/chosen": -336.2220764160156, "logps/rejected": -411.07220458984375, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.3641813099384308, "rewards/margins": 18.316801071166992, "rewards/rejected": -18.680980682373047, "step": 3365 }, { "epoch": 1.15, "learning_rate": 8.088413712986279e-07, "logits/chosen": -0.11764358729124069, "logits/rejected": -0.09918861836194992, "logps/chosen": -231.68850708007812, "logps/rejected": -374.72216796875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6346204280853271, "rewards/margins": 14.587113380432129, "rewards/rejected": -16.22173309326172, "step": 3366 }, { "epoch": 1.15, "learning_rate": 8.082988848599728e-07, "logits/chosen": 0.026920408010482788, "logits/rejected": 0.036027804017066956, "logps/chosen": -202.2091522216797, "logps/rejected": -351.3515625, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.9717147946357727, "rewards/margins": 15.521821975708008, "rewards/rejected": -16.4935359954834, "step": 3367 }, { "epoch": 1.15, "learning_rate": 8.077564569833631e-07, "logits/chosen": -0.04528431221842766, "logits/rejected": -0.043984100222587585, "logps/chosen": -172.64617919921875, "logps/rejected": -350.145263671875, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2404801845550537, "rewards/margins": 14.69088363647461, "rewards/rejected": -15.931364059448242, "step": 3368 }, { "epoch": 1.15, "learning_rate": 8.072140878345027e-07, "logits/chosen": -0.07709133625030518, "logits/rejected": -0.06402567774057388, "logps/chosen": -197.094482421875, "logps/rejected": -390.910888671875, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -2.454726457595825, "rewards/margins": 17.40479278564453, "rewards/rejected": -19.859519958496094, "step": 3369 }, { "epoch": 1.15, "learning_rate": 8.066717775790781e-07, "logits/chosen": -0.07413624227046967, "logits/rejected": -0.061294183135032654, "logps/chosen": -185.5415802001953, "logps/rejected": -340.1534423828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.089317798614502, "rewards/margins": 12.869735717773438, "rewards/rejected": -14.959053039550781, "step": 3370 }, { "epoch": 1.15, "learning_rate": 8.061295263827574e-07, "logits/chosen": -0.07047921419143677, "logits/rejected": -0.08218075335025787, "logps/chosen": -216.89498901367188, "logps/rejected": -427.9627990722656, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.9645540714263916, "rewards/margins": 16.914955139160156, "rewards/rejected": -17.87950897216797, "step": 3371 }, { "epoch": 1.15, "learning_rate": 8.055873344111905e-07, "logits/chosen": -0.06638315320014954, "logits/rejected": -0.03492584824562073, "logps/chosen": -189.42176818847656, "logps/rejected": -373.47735595703125, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -2.2243118286132812, "rewards/margins": 16.402732849121094, "rewards/rejected": -18.627042770385742, "step": 3372 }, { "epoch": 1.15, "learning_rate": 8.050452018300107e-07, "logits/chosen": -0.03295772895216942, "logits/rejected": 0.020446902140975, "logps/chosen": -170.40713500976562, "logps/rejected": -298.8690490722656, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.4497594833374023, "rewards/margins": 14.746356010437012, "rewards/rejected": -16.19611358642578, "step": 3373 }, { "epoch": 1.15, "learning_rate": 8.045031288048307e-07, "logits/chosen": -0.11053666472434998, "logits/rejected": -0.07117678225040436, "logps/chosen": -289.0061950683594, "logps/rejected": -393.8031005859375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.7589358687400818, "rewards/margins": 16.941364288330078, "rewards/rejected": -17.700298309326172, "step": 3374 }, { "epoch": 1.15, "learning_rate": 8.039611155012473e-07, "logits/chosen": -0.11311031877994537, "logits/rejected": -0.09265629202127457, "logps/chosen": -236.67608642578125, "logps/rejected": -416.412353515625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.9696485996246338, "rewards/margins": 18.765045166015625, "rewards/rejected": -20.734695434570312, "step": 3375 }, { "epoch": 1.15, "learning_rate": 8.034191620848379e-07, "logits/chosen": -0.0019238620297983289, "logits/rejected": 0.020013678818941116, "logps/chosen": -188.61708068847656, "logps/rejected": -319.3209533691406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1794779300689697, "rewards/margins": 15.7648286819458, "rewards/rejected": -16.944305419921875, "step": 3376 }, { "epoch": 1.15, "learning_rate": 8.02877268721161e-07, "logits/chosen": 0.011236350052058697, "logits/rejected": 0.02919122204184532, "logps/chosen": -195.11465454101562, "logps/rejected": -321.35015869140625, "loss": 0.0453, "rewards/accuracies": 0.9375, "rewards/chosen": -1.275717854499817, "rewards/margins": 13.48154067993164, "rewards/rejected": -14.757259368896484, "step": 3377 }, { "epoch": 1.15, "learning_rate": 8.023354355757586e-07, "logits/chosen": -0.04755334556102753, "logits/rejected": -0.019128020852804184, "logps/chosen": -225.23309326171875, "logps/rejected": -344.9446105957031, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.095691442489624, "rewards/margins": 13.375418663024902, "rewards/rejected": -14.471110343933105, "step": 3378 }, { "epoch": 1.15, "learning_rate": 8.017936628141526e-07, "logits/chosen": -0.14006905257701874, "logits/rejected": -0.1086696982383728, "logps/chosen": -210.7289276123047, "logps/rejected": -348.55474853515625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.3542425632476807, "rewards/margins": 16.447355270385742, "rewards/rejected": -17.801597595214844, "step": 3379 }, { "epoch": 1.15, "learning_rate": 8.012519506018474e-07, "logits/chosen": 0.0014434880577027798, "logits/rejected": 0.023755960166454315, "logps/chosen": -135.72035217285156, "logps/rejected": -295.32830810546875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.5423121452331543, "rewards/margins": 14.597105026245117, "rewards/rejected": -18.13941764831543, "step": 3380 }, { "epoch": 1.15, "learning_rate": 8.007102991043286e-07, "logits/chosen": -0.07142814993858337, "logits/rejected": -0.07043721526861191, "logps/chosen": -230.39395141601562, "logps/rejected": -360.37744140625, "loss": 0.0308, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8451511263847351, "rewards/margins": 11.53144359588623, "rewards/rejected": -12.376594543457031, "step": 3381 }, { "epoch": 1.15, "learning_rate": 8.00168708487063e-07, "logits/chosen": -0.13498128950595856, "logits/rejected": -0.09075572341680527, "logps/chosen": -228.104736328125, "logps/rejected": -352.653076171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.763833999633789, "rewards/margins": 14.077231407165527, "rewards/rejected": -15.841066360473633, "step": 3382 }, { "epoch": 1.15, "learning_rate": 7.996271789154993e-07, "logits/chosen": -0.07739850878715515, "logits/rejected": -0.03923741355538368, "logps/chosen": -198.3311767578125, "logps/rejected": -366.5804443359375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.0822267532348633, "rewards/margins": 16.055866241455078, "rewards/rejected": -18.138093948364258, "step": 3383 }, { "epoch": 1.15, "learning_rate": 7.990857105550675e-07, "logits/chosen": -0.10231047868728638, "logits/rejected": -0.09783180803060532, "logps/chosen": -199.46292114257812, "logps/rejected": -357.39874267578125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.5401556491851807, "rewards/margins": 15.52471923828125, "rewards/rejected": -18.064876556396484, "step": 3384 }, { "epoch": 1.16, "learning_rate": 7.98544303571178e-07, "logits/chosen": -0.01792326010763645, "logits/rejected": 0.01088477298617363, "logps/chosen": -210.30917358398438, "logps/rejected": -394.34490966796875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.7976992130279541, "rewards/margins": 18.00405502319336, "rewards/rejected": -18.801753997802734, "step": 3385 }, { "epoch": 1.16, "learning_rate": 7.980029581292238e-07, "logits/chosen": -0.0533762127161026, "logits/rejected": -0.017627611756324768, "logps/chosen": -210.8109893798828, "logps/rejected": -380.7237854003906, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5547971725463867, "rewards/margins": 17.014497756958008, "rewards/rejected": -19.569293975830078, "step": 3386 }, { "epoch": 1.16, "learning_rate": 7.974616743945779e-07, "logits/chosen": -0.055697157979011536, "logits/rejected": -0.03214334324002266, "logps/chosen": -166.40577697753906, "logps/rejected": -324.7575378417969, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0956697463989258, "rewards/margins": 14.986038208007812, "rewards/rejected": -16.081708908081055, "step": 3387 }, { "epoch": 1.16, "learning_rate": 7.969204525325961e-07, "logits/chosen": 0.014055736362934113, "logits/rejected": 0.03541703149676323, "logps/chosen": -285.8271789550781, "logps/rejected": -424.9552307128906, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -2.1480674743652344, "rewards/margins": 16.886043548583984, "rewards/rejected": -19.03411102294922, "step": 3388 }, { "epoch": 1.16, "learning_rate": 7.963792927086132e-07, "logits/chosen": -0.035746980458498, "logits/rejected": 0.006938069127500057, "logps/chosen": -231.10946655273438, "logps/rejected": -337.95880126953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6753185391426086, "rewards/margins": 15.325492858886719, "rewards/rejected": -16.00081443786621, "step": 3389 }, { "epoch": 1.16, "learning_rate": 7.958381950879459e-07, "logits/chosen": 0.009019313380122185, "logits/rejected": 0.021285029128193855, "logps/chosen": -175.30825805664062, "logps/rejected": -311.09417724609375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.7726556062698364, "rewards/margins": 14.263976097106934, "rewards/rejected": -15.036632537841797, "step": 3390 }, { "epoch": 1.16, "learning_rate": 7.952971598358933e-07, "logits/chosen": -0.120168536901474, "logits/rejected": -0.10159661620855331, "logps/chosen": -179.45138549804688, "logps/rejected": -306.30029296875, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.352060317993164, "rewards/margins": 13.015936851501465, "rewards/rejected": -14.367996215820312, "step": 3391 }, { "epoch": 1.16, "learning_rate": 7.947561871177326e-07, "logits/chosen": -0.11636947095394135, "logits/rejected": -0.08305864781141281, "logps/chosen": -232.19052124023438, "logps/rejected": -342.1678466796875, "loss": 0.0241, "rewards/accuracies": 0.9375, "rewards/chosen": -0.045719683170318604, "rewards/margins": 16.90915870666504, "rewards/rejected": -16.954879760742188, "step": 3392 }, { "epoch": 1.16, "learning_rate": 7.942152770987248e-07, "logits/chosen": -0.07909847050905228, "logits/rejected": -0.038599323481321335, "logps/chosen": -201.52748107910156, "logps/rejected": -350.89703369140625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.2174763679504395, "rewards/margins": 16.317829132080078, "rewards/rejected": -20.53530502319336, "step": 3393 }, { "epoch": 1.16, "learning_rate": 7.936744299441099e-07, "logits/chosen": 0.023929418995976448, "logits/rejected": 0.059942182153463364, "logps/chosen": -177.15992736816406, "logps/rejected": -322.8209533691406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.11847984790802, "rewards/margins": 14.113628387451172, "rewards/rejected": -15.232109069824219, "step": 3394 }, { "epoch": 1.16, "learning_rate": 7.931336458191092e-07, "logits/chosen": -0.18946857750415802, "logits/rejected": -0.1496112197637558, "logps/chosen": -236.41050720214844, "logps/rejected": -375.5697021484375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7664533853530884, "rewards/margins": 18.514419555664062, "rewards/rejected": -19.28087043762207, "step": 3395 }, { "epoch": 1.16, "learning_rate": 7.92592924888925e-07, "logits/chosen": -0.0834575816988945, "logits/rejected": -0.04449623450636864, "logps/chosen": -195.7429962158203, "logps/rejected": -330.81036376953125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.1572632789611816, "rewards/margins": 15.760099411010742, "rewards/rejected": -17.917360305786133, "step": 3396 }, { "epoch": 1.16, "learning_rate": 7.920522673187397e-07, "logits/chosen": -0.07246008515357971, "logits/rejected": -0.05206546187400818, "logps/chosen": -250.1605682373047, "logps/rejected": -385.8573303222656, "loss": 0.1069, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8140308856964111, "rewards/margins": 12.64689826965332, "rewards/rejected": -13.460930824279785, "step": 3397 }, { "epoch": 1.16, "learning_rate": 7.915116732737174e-07, "logits/chosen": -0.19859027862548828, "logits/rejected": -0.1731635332107544, "logps/chosen": -266.5088806152344, "logps/rejected": -429.13604736328125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.6190385818481445, "rewards/margins": 16.254474639892578, "rewards/rejected": -17.87351417541504, "step": 3398 }, { "epoch": 1.16, "learning_rate": 7.909711429190016e-07, "logits/chosen": -0.07313761115074158, "logits/rejected": -0.03748973086476326, "logps/chosen": -207.25616455078125, "logps/rejected": -317.86578369140625, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": -0.5532494187355042, "rewards/margins": 13.853933334350586, "rewards/rejected": -14.407182693481445, "step": 3399 }, { "epoch": 1.16, "learning_rate": 7.904306764197168e-07, "logits/chosen": -0.15699319541454315, "logits/rejected": -0.11702536791563034, "logps/chosen": -243.69595336914062, "logps/rejected": -370.917236328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.47286295890808105, "rewards/margins": 14.70770263671875, "rewards/rejected": -15.180564880371094, "step": 3400 }, { "epoch": 1.16, "learning_rate": 7.898902739409683e-07, "logits/chosen": -0.004939667880535126, "logits/rejected": 0.020712928846478462, "logps/chosen": -227.4593963623047, "logps/rejected": -338.09722900390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1130930185317993, "rewards/margins": 14.882755279541016, "rewards/rejected": -15.995847702026367, "step": 3401 }, { "epoch": 1.16, "learning_rate": 7.893499356478413e-07, "logits/chosen": -0.02154899574816227, "logits/rejected": -0.018216924741864204, "logps/chosen": -135.70968627929688, "logps/rejected": -249.50881958007812, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.7905402183532715, "rewards/margins": 11.22525405883789, "rewards/rejected": -14.01579475402832, "step": 3402 }, { "epoch": 1.16, "learning_rate": 7.888096617054024e-07, "logits/chosen": -0.03910974785685539, "logits/rejected": -0.022907208651304245, "logps/chosen": -142.39947509765625, "logps/rejected": -325.762939453125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8603100776672363, "rewards/margins": 16.954925537109375, "rewards/rejected": -19.815235137939453, "step": 3403 }, { "epoch": 1.16, "learning_rate": 7.882694522786972e-07, "logits/chosen": -0.04348677396774292, "logits/rejected": -0.0034448145888745785, "logps/chosen": -195.9589080810547, "logps/rejected": -380.8990783691406, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.13369019329547882, "rewards/margins": 16.619028091430664, "rewards/rejected": -16.752717971801758, "step": 3404 }, { "epoch": 1.16, "learning_rate": 7.877293075327521e-07, "logits/chosen": -0.04683070257306099, "logits/rejected": -0.017538810148835182, "logps/chosen": -200.9457550048828, "logps/rejected": -395.19085693359375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1198413372039795, "rewards/margins": 20.2283992767334, "rewards/rejected": -21.34823989868164, "step": 3405 }, { "epoch": 1.16, "learning_rate": 7.871892276325746e-07, "logits/chosen": -0.06679824739694595, "logits/rejected": -0.04648090526461601, "logps/chosen": -178.53970336914062, "logps/rejected": -312.6431884765625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.6298828125, "rewards/margins": 14.060110092163086, "rewards/rejected": -15.689992904663086, "step": 3406 }, { "epoch": 1.16, "learning_rate": 7.866492127431506e-07, "logits/chosen": -0.13289694488048553, "logits/rejected": -0.10020218789577484, "logps/chosen": -201.96640014648438, "logps/rejected": -337.1558532714844, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -2.593336582183838, "rewards/margins": 14.541095733642578, "rewards/rejected": -17.134431838989258, "step": 3407 }, { "epoch": 1.16, "learning_rate": 7.861092630294484e-07, "logits/chosen": -0.1668434739112854, "logits/rejected": -0.14948830008506775, "logps/chosen": -183.1343231201172, "logps/rejected": -315.7760314941406, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.1975603103637695, "rewards/margins": 14.714730262756348, "rewards/rejected": -16.912290573120117, "step": 3408 }, { "epoch": 1.16, "learning_rate": 7.855693786564144e-07, "logits/chosen": 0.0028200012166053057, "logits/rejected": 0.03764859586954117, "logps/chosen": -177.81292724609375, "logps/rejected": -264.40673828125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9584300518035889, "rewards/margins": 12.26135540008545, "rewards/rejected": -13.2197847366333, "step": 3409 }, { "epoch": 1.16, "learning_rate": 7.850295597889759e-07, "logits/chosen": -0.15437595546245575, "logits/rejected": -0.12902864813804626, "logps/chosen": -178.1915283203125, "logps/rejected": -280.2602233886719, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8984562158584595, "rewards/margins": 13.141345024108887, "rewards/rejected": -14.039799690246582, "step": 3410 }, { "epoch": 1.16, "learning_rate": 7.844898065920403e-07, "logits/chosen": -0.04498587176203728, "logits/rejected": -0.009862958453595638, "logps/chosen": -146.96343994140625, "logps/rejected": -296.65631103515625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.7118535041809082, "rewards/margins": 16.409595489501953, "rewards/rejected": -18.121448516845703, "step": 3411 }, { "epoch": 1.16, "learning_rate": 7.839501192304947e-07, "logits/chosen": -0.16748034954071045, "logits/rejected": -0.1526578813791275, "logps/chosen": -253.2611083984375, "logps/rejected": -419.4695129394531, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.513798713684082, "rewards/margins": 16.00635528564453, "rewards/rejected": -17.520153045654297, "step": 3412 }, { "epoch": 1.16, "learning_rate": 7.834104978692062e-07, "logits/chosen": -0.08487354218959808, "logits/rejected": -0.04206658527255058, "logps/chosen": -243.69325256347656, "logps/rejected": -352.2213439941406, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890794277191162, "rewards/margins": 14.28710651397705, "rewards/rejected": -16.076187133789062, "step": 3413 }, { "epoch": 1.17, "learning_rate": 7.828709426730216e-07, "logits/chosen": -0.10949817299842834, "logits/rejected": -0.09531478583812714, "logps/chosen": -205.18280029296875, "logps/rejected": -386.34466552734375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7894562482833862, "rewards/margins": 16.479219436645508, "rewards/rejected": -17.2686767578125, "step": 3414 }, { "epoch": 1.17, "learning_rate": 7.823314538067673e-07, "logits/chosen": -0.18063414096832275, "logits/rejected": -0.19033604860305786, "logps/chosen": -173.0842742919922, "logps/rejected": -332.0321044921875, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.8101365566253662, "rewards/margins": 10.49034309387207, "rewards/rejected": -12.300479888916016, "step": 3415 }, { "epoch": 1.17, "learning_rate": 7.817920314352502e-07, "logits/chosen": -0.15838447213172913, "logits/rejected": -0.13830283284187317, "logps/chosen": -191.8709259033203, "logps/rejected": -330.02154541015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.30661022663116455, "rewards/margins": 13.312817573547363, "rewards/rejected": -13.619428634643555, "step": 3416 }, { "epoch": 1.17, "learning_rate": 7.812526757232558e-07, "logits/chosen": -0.042756903916597366, "logits/rejected": 0.01500712800770998, "logps/chosen": -211.01748657226562, "logps/rejected": -412.65966796875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.1063100099563599, "rewards/margins": 19.732749938964844, "rewards/rejected": -20.839061737060547, "step": 3417 }, { "epoch": 1.17, "learning_rate": 7.807133868355507e-07, "logits/chosen": -0.1417800337076187, "logits/rejected": -0.1138424426317215, "logps/chosen": -219.1596221923828, "logps/rejected": -313.7765197753906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.06391769647598267, "rewards/margins": 13.57725715637207, "rewards/rejected": -13.641175270080566, "step": 3418 }, { "epoch": 1.17, "learning_rate": 7.801741649368797e-07, "logits/chosen": -0.1208914965391159, "logits/rejected": -0.07814954966306686, "logps/chosen": -216.77947998046875, "logps/rejected": -344.2994689941406, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 0.033980533480644226, "rewards/margins": 16.02713966369629, "rewards/rejected": -15.993158340454102, "step": 3419 }, { "epoch": 1.17, "learning_rate": 7.796350101919671e-07, "logits/chosen": -0.014007111079990864, "logits/rejected": -0.018500598147511482, "logps/chosen": -181.722900390625, "logps/rejected": -325.76617431640625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.7003087997436523, "rewards/margins": 12.130364418029785, "rewards/rejected": -13.830673217773438, "step": 3420 }, { "epoch": 1.17, "learning_rate": 7.790959227655185e-07, "logits/chosen": -0.03278622776269913, "logits/rejected": -0.021844258531928062, "logps/chosen": -192.31097412109375, "logps/rejected": -371.2643737792969, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.9246160984039307, "rewards/margins": 17.1844539642334, "rewards/rejected": -19.10906982421875, "step": 3421 }, { "epoch": 1.17, "learning_rate": 7.785569028222167e-07, "logits/chosen": -0.0695926621556282, "logits/rejected": -0.059104885905981064, "logps/chosen": -249.07080078125, "logps/rejected": -446.99981689453125, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -1.363464593887329, "rewards/margins": 17.828262329101562, "rewards/rejected": -19.191726684570312, "step": 3422 }, { "epoch": 1.17, "learning_rate": 7.780179505267246e-07, "logits/chosen": 0.012478196993470192, "logits/rejected": 0.024233503267169, "logps/chosen": -151.3903350830078, "logps/rejected": -302.9154357910156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.016181707382202, "rewards/margins": 12.863519668579102, "rewards/rejected": -14.879701614379883, "step": 3423 }, { "epoch": 1.17, "learning_rate": 7.774790660436857e-07, "logits/chosen": -0.11741123348474503, "logits/rejected": -0.07866168022155762, "logps/chosen": -194.68283081054688, "logps/rejected": -303.5086669921875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.587902307510376, "rewards/margins": 14.29826831817627, "rewards/rejected": -14.886171340942383, "step": 3424 }, { "epoch": 1.17, "learning_rate": 7.76940249537721e-07, "logits/chosen": -0.108610600233078, "logits/rejected": -0.07255998998880386, "logps/chosen": -189.922119140625, "logps/rejected": -344.87164306640625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.9206616878509521, "rewards/margins": 16.456939697265625, "rewards/rejected": -18.377601623535156, "step": 3425 }, { "epoch": 1.17, "learning_rate": 7.764015011734319e-07, "logits/chosen": -0.10788119584321976, "logits/rejected": -0.08850976079702377, "logps/chosen": -178.96023559570312, "logps/rejected": -269.03985595703125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.4218621253967285, "rewards/margins": 11.28959846496582, "rewards/rejected": -12.71146011352539, "step": 3426 }, { "epoch": 1.17, "learning_rate": 7.758628211153986e-07, "logits/chosen": -0.11367643624544144, "logits/rejected": -0.07769924402236938, "logps/chosen": -235.68324279785156, "logps/rejected": -342.3856506347656, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.6832000017166138, "rewards/margins": 15.147007942199707, "rewards/rejected": -15.830206871032715, "step": 3427 }, { "epoch": 1.17, "learning_rate": 7.753242095281802e-07, "logits/chosen": -0.10463888943195343, "logits/rejected": -0.06192050874233246, "logps/chosen": -178.996826171875, "logps/rejected": -316.4006042480469, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.4555245637893677, "rewards/margins": 14.699682235717773, "rewards/rejected": -16.155208587646484, "step": 3428 }, { "epoch": 1.17, "learning_rate": 7.747856665763151e-07, "logits/chosen": -0.02777896076440811, "logits/rejected": -0.0023767489474266768, "logps/chosen": -176.94493103027344, "logps/rejected": -342.5531005859375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4030200242996216, "rewards/margins": 17.795223236083984, "rewards/rejected": -19.198244094848633, "step": 3429 }, { "epoch": 1.17, "learning_rate": 7.742471924243208e-07, "logits/chosen": -0.08044540882110596, "logits/rejected": -0.04980434849858284, "logps/chosen": -232.630615234375, "logps/rejected": -443.29888916015625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.2063074111938477, "rewards/margins": 20.212581634521484, "rewards/rejected": -21.418888092041016, "step": 3430 }, { "epoch": 1.17, "learning_rate": 7.737087872366939e-07, "logits/chosen": -0.09640749543905258, "logits/rejected": -0.07967336475849152, "logps/chosen": -210.898681640625, "logps/rejected": -356.5310974121094, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.849368691444397, "rewards/margins": 12.997050285339355, "rewards/rejected": -13.846419334411621, "step": 3431 }, { "epoch": 1.17, "learning_rate": 7.731704511779098e-07, "logits/chosen": -0.054657626897096634, "logits/rejected": -0.011649584397673607, "logps/chosen": -243.69322204589844, "logps/rejected": -408.9027099609375, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -2.8101577758789062, "rewards/margins": 19.209903717041016, "rewards/rejected": -22.020063400268555, "step": 3432 }, { "epoch": 1.17, "learning_rate": 7.726321844124221e-07, "logits/chosen": -0.013569386675953865, "logits/rejected": 0.008114590309560299, "logps/chosen": -206.3740234375, "logps/rejected": -351.94976806640625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.4640069603919983, "rewards/margins": 14.954059600830078, "rewards/rejected": -15.418066024780273, "step": 3433 }, { "epoch": 1.17, "learning_rate": 7.720939871046647e-07, "logits/chosen": -0.12854865193367004, "logits/rejected": -0.13105608522891998, "logps/chosen": -196.99188232421875, "logps/rejected": -415.17535400390625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -2.1063990592956543, "rewards/margins": 17.916711807250977, "rewards/rejected": -20.02311134338379, "step": 3434 }, { "epoch": 1.17, "learning_rate": 7.715558594190485e-07, "logits/chosen": -0.09533988684415817, "logits/rejected": -0.059435438364744186, "logps/chosen": -197.5581512451172, "logps/rejected": -302.69171142578125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.2628350257873535, "rewards/margins": 12.651496887207031, "rewards/rejected": -14.914332389831543, "step": 3435 }, { "epoch": 1.17, "learning_rate": 7.710178015199655e-07, "logits/chosen": -0.03705224394798279, "logits/rejected": -0.019141120836138725, "logps/chosen": -178.77102661132812, "logps/rejected": -397.820068359375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.0206894874572754, "rewards/margins": 17.8362979888916, "rewards/rejected": -19.85698890686035, "step": 3436 }, { "epoch": 1.17, "learning_rate": 7.704798135717837e-07, "logits/chosen": -0.008687235414981842, "logits/rejected": -0.003974980674684048, "logps/chosen": -210.3917236328125, "logps/rejected": -394.210205078125, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.550774097442627, "rewards/margins": 17.618349075317383, "rewards/rejected": -19.16912269592285, "step": 3437 }, { "epoch": 1.17, "learning_rate": 7.69941895738851e-07, "logits/chosen": -0.048496440052986145, "logits/rejected": -0.031569432467222214, "logps/chosen": -178.96334838867188, "logps/rejected": -275.2128601074219, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -1.1397380828857422, "rewards/margins": 12.87475872039795, "rewards/rejected": -14.014497756958008, "step": 3438 }, { "epoch": 1.17, "learning_rate": 7.694040481854949e-07, "logits/chosen": -0.11044309288263321, "logits/rejected": -0.06050700694322586, "logps/chosen": -186.36129760742188, "logps/rejected": -297.4175720214844, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -2.1986567974090576, "rewards/margins": 14.094675064086914, "rewards/rejected": -16.293331146240234, "step": 3439 }, { "epoch": 1.17, "learning_rate": 7.688662710760193e-07, "logits/chosen": -0.1963539868593216, "logits/rejected": -0.17475318908691406, "logps/chosen": -239.70257568359375, "logps/rejected": -351.20648193359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1926639080047607, "rewards/margins": 13.841886520385742, "rewards/rejected": -15.034549713134766, "step": 3440 }, { "epoch": 1.17, "learning_rate": 7.683285645747085e-07, "logits/chosen": -0.1020570918917656, "logits/rejected": -0.07665658742189407, "logps/chosen": -234.50454711914062, "logps/rejected": -374.0188903808594, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.6063108444213867, "rewards/margins": 17.534154891967773, "rewards/rejected": -19.14046859741211, "step": 3441 }, { "epoch": 1.17, "learning_rate": 7.677909288458242e-07, "logits/chosen": -0.1255744844675064, "logits/rejected": -0.09259041398763657, "logps/chosen": -239.85955810546875, "logps/rejected": -444.9625244140625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.446892261505127, "rewards/margins": 19.995718002319336, "rewards/rejected": -22.442611694335938, "step": 3442 }, { "epoch": 1.18, "learning_rate": 7.672533640536061e-07, "logits/chosen": -0.08249269425868988, "logits/rejected": -0.042485084384679794, "logps/chosen": -193.8581085205078, "logps/rejected": -305.376953125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.110032796859741, "rewards/margins": 13.35961627960205, "rewards/rejected": -15.469648361206055, "step": 3443 }, { "epoch": 1.18, "learning_rate": 7.667158703622738e-07, "logits/chosen": -0.12021540850400925, "logits/rejected": -0.0974816158413887, "logps/chosen": -218.6141357421875, "logps/rejected": -436.5089416503906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9488294124603271, "rewards/margins": 19.93335723876953, "rewards/rejected": -21.882184982299805, "step": 3444 }, { "epoch": 1.18, "learning_rate": 7.661784479360234e-07, "logits/chosen": 0.0073068528436124325, "logits/rejected": 0.013663611374795437, "logps/chosen": -121.89850616455078, "logps/rejected": -296.19073486328125, "loss": 0.0162, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0789178609848022, "rewards/margins": 14.80007553100586, "rewards/rejected": -15.87899112701416, "step": 3445 }, { "epoch": 1.18, "learning_rate": 7.656410969390306e-07, "logits/chosen": -0.04487822577357292, "logits/rejected": -0.0005895415088161826, "logps/chosen": -222.06468200683594, "logps/rejected": -316.1610107421875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.7912204265594482, "rewards/margins": 12.955345153808594, "rewards/rejected": -15.746565818786621, "step": 3446 }, { "epoch": 1.18, "learning_rate": 7.651038175354482e-07, "logits/chosen": -0.0003594034933485091, "logits/rejected": 0.005864366423338652, "logps/chosen": -199.92205810546875, "logps/rejected": -377.29254150390625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.5079100728034973, "rewards/margins": 16.066970825195312, "rewards/rejected": -16.57488250732422, "step": 3447 }, { "epoch": 1.18, "learning_rate": 7.64566609889408e-07, "logits/chosen": 0.01812031678855419, "logits/rejected": 0.06311871856451035, "logps/chosen": -215.4241180419922, "logps/rejected": -299.8575439453125, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.09718549251556396, "rewards/margins": 14.879323959350586, "rewards/rejected": -14.976509094238281, "step": 3448 }, { "epoch": 1.18, "learning_rate": 7.640294741650195e-07, "logits/chosen": -0.10899483412504196, "logits/rejected": -0.05678436905145645, "logps/chosen": -249.45510864257812, "logps/rejected": -368.263916015625, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.5783513784408569, "rewards/margins": 16.06740379333496, "rewards/rejected": -16.645755767822266, "step": 3449 }, { "epoch": 1.18, "learning_rate": 7.634924105263699e-07, "logits/chosen": -0.06804072111845016, "logits/rejected": -0.043751634657382965, "logps/chosen": -195.83302307128906, "logps/rejected": -336.8833312988281, "loss": 0.1444, "rewards/accuracies": 1.0, "rewards/chosen": -1.7217222452163696, "rewards/margins": 13.366538047790527, "rewards/rejected": -15.088258743286133, "step": 3450 }, { "epoch": 1.18, "learning_rate": 7.629554191375254e-07, "logits/chosen": -0.025592423975467682, "logits/rejected": -0.0026095642242580652, "logps/chosen": -176.3201141357422, "logps/rejected": -244.26992797851562, "loss": 0.0344, "rewards/accuracies": 0.9375, "rewards/chosen": -1.539088249206543, "rewards/margins": 10.59852123260498, "rewards/rejected": -12.137609481811523, "step": 3451 }, { "epoch": 1.18, "learning_rate": 7.624185001625291e-07, "logits/chosen": -0.08116535097360611, "logits/rejected": -0.056654512882232666, "logps/chosen": -151.89480590820312, "logps/rejected": -284.72833251953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.6164292693138123, "rewards/margins": 15.274353981018066, "rewards/rejected": -14.65792465209961, "step": 3452 }, { "epoch": 1.18, "learning_rate": 7.618816537654017e-07, "logits/chosen": 0.024836789816617966, "logits/rejected": 0.05109155923128128, "logps/chosen": -149.7044677734375, "logps/rejected": -251.919189453125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.6929690837860107, "rewards/margins": 12.038208961486816, "rewards/rejected": -13.731178283691406, "step": 3453 }, { "epoch": 1.18, "learning_rate": 7.613448801101434e-07, "logits/chosen": -0.07201355695724487, "logits/rejected": -0.05644854158163071, "logps/chosen": -173.5558319091797, "logps/rejected": -278.9808349609375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.850180983543396, "rewards/margins": 11.921215057373047, "rewards/rejected": -13.77139663696289, "step": 3454 }, { "epoch": 1.18, "learning_rate": 7.608081793607306e-07, "logits/chosen": -0.16902698576450348, "logits/rejected": -0.13123664259910583, "logps/chosen": -220.5141143798828, "logps/rejected": -342.70599365234375, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.058105230331421, "rewards/margins": 15.87926197052002, "rewards/rejected": -17.937366485595703, "step": 3455 }, { "epoch": 1.18, "learning_rate": 7.602715516811183e-07, "logits/chosen": -0.11010587960481644, "logits/rejected": -0.04907354712486267, "logps/chosen": -180.99365234375, "logps/rejected": -345.9983215332031, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.48390793800354, "rewards/margins": 18.31858253479004, "rewards/rejected": -19.802490234375, "step": 3456 }, { "epoch": 1.18, "learning_rate": 7.597349972352386e-07, "logits/chosen": -0.024163875728845596, "logits/rejected": -0.013508063741028309, "logps/chosen": -198.40286254882812, "logps/rejected": -390.9637145996094, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.1661856174468994, "rewards/margins": 18.16206932067871, "rewards/rejected": -20.3282527923584, "step": 3457 }, { "epoch": 1.18, "learning_rate": 7.591985161870014e-07, "logits/chosen": -0.09122452139854431, "logits/rejected": -0.038113873451948166, "logps/chosen": -232.0076446533203, "logps/rejected": -376.7110595703125, "loss": 0.0407, "rewards/accuracies": 0.9375, "rewards/chosen": -1.390058159828186, "rewards/margins": 17.44976043701172, "rewards/rejected": -18.839818954467773, "step": 3458 }, { "epoch": 1.18, "learning_rate": 7.586621087002945e-07, "logits/chosen": 0.12280656397342682, "logits/rejected": 0.11602392047643661, "logps/chosen": -133.83306884765625, "logps/rejected": -315.7776794433594, "loss": 0.1393, "rewards/accuracies": 1.0, "rewards/chosen": -2.1310317516326904, "rewards/margins": 16.05194854736328, "rewards/rejected": -18.182981491088867, "step": 3459 }, { "epoch": 1.18, "learning_rate": 7.581257749389828e-07, "logits/chosen": -0.07608869671821594, "logits/rejected": -0.06358706206083298, "logps/chosen": -216.63510131835938, "logps/rejected": -368.79315185546875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3444392681121826, "rewards/margins": 15.852324485778809, "rewards/rejected": -16.19676399230957, "step": 3460 }, { "epoch": 1.18, "learning_rate": 7.575895150669088e-07, "logits/chosen": -0.1718326061964035, "logits/rejected": -0.11071127653121948, "logps/chosen": -232.55596923828125, "logps/rejected": -290.8102722167969, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.10062141716480255, "rewards/margins": 15.892778396606445, "rewards/rejected": -15.792158126831055, "step": 3461 }, { "epoch": 1.18, "learning_rate": 7.570533292478927e-07, "logits/chosen": -0.1262134313583374, "logits/rejected": -0.09767036139965057, "logps/chosen": -208.6397705078125, "logps/rejected": -383.392333984375, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171619892120361, "rewards/margins": 17.09222412109375, "rewards/rejected": -18.709386825561523, "step": 3462 }, { "epoch": 1.18, "learning_rate": 7.565172176457316e-07, "logits/chosen": -0.1219322457909584, "logits/rejected": -0.12665079534053802, "logps/chosen": -234.24989318847656, "logps/rejected": -452.6175842285156, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.2714892625808716, "rewards/margins": 20.65919303894043, "rewards/rejected": -21.930681228637695, "step": 3463 }, { "epoch": 1.18, "learning_rate": 7.559811804242006e-07, "logits/chosen": -0.04224817827343941, "logits/rejected": 0.009361603297293186, "logps/chosen": -231.69216918945312, "logps/rejected": -276.3431396484375, "loss": 0.0494, "rewards/accuracies": 0.9375, "rewards/chosen": -1.956345796585083, "rewards/margins": 10.448415756225586, "rewards/rejected": -12.404759407043457, "step": 3464 }, { "epoch": 1.18, "learning_rate": 7.554452177470515e-07, "logits/chosen": -0.10933267325162888, "logits/rejected": -0.08879020065069199, "logps/chosen": -214.38613891601562, "logps/rejected": -389.503173828125, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 0.23483647406101227, "rewards/margins": 19.50337791442871, "rewards/rejected": -19.26854133605957, "step": 3465 }, { "epoch": 1.18, "learning_rate": 7.549093297780131e-07, "logits/chosen": -0.07587862014770508, "logits/rejected": -0.01723543368279934, "logps/chosen": -210.0130615234375, "logps/rejected": -293.5450744628906, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.6709580421447754, "rewards/margins": 12.637365341186523, "rewards/rejected": -13.308324813842773, "step": 3466 }, { "epoch": 1.18, "learning_rate": 7.543735166807926e-07, "logits/chosen": -0.080515056848526, "logits/rejected": -0.03525814414024353, "logps/chosen": -206.46371459960938, "logps/rejected": -343.8717346191406, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.5094988346099854, "rewards/margins": 17.656312942504883, "rewards/rejected": -19.16581153869629, "step": 3467 }, { "epoch": 1.18, "learning_rate": 7.538377786190724e-07, "logits/chosen": -0.10936042666435242, "logits/rejected": -0.0783778727054596, "logps/chosen": -293.9393615722656, "logps/rejected": -436.506591796875, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.5408486723899841, "rewards/margins": 18.464365005493164, "rewards/rejected": -19.00521469116211, "step": 3468 }, { "epoch": 1.18, "learning_rate": 7.533021157565141e-07, "logits/chosen": -0.07358827441930771, "logits/rejected": -0.03663705289363861, "logps/chosen": -156.35110473632812, "logps/rejected": -285.60662841796875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.27218759059906, "rewards/margins": 15.021678924560547, "rewards/rejected": -16.293867111206055, "step": 3469 }, { "epoch": 1.18, "learning_rate": 7.527665282567553e-07, "logits/chosen": 0.03167328983545303, "logits/rejected": 0.042841874063014984, "logps/chosen": -204.34747314453125, "logps/rejected": -391.90771484375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9304518699645996, "rewards/margins": 17.193401336669922, "rewards/rejected": -18.12385368347168, "step": 3470 }, { "epoch": 1.18, "learning_rate": 7.522310162834095e-07, "logits/chosen": 0.014333960600197315, "logits/rejected": 0.02479986846446991, "logps/chosen": -180.2791748046875, "logps/rejected": -385.6188659667969, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.1438674926757812, "rewards/margins": 17.386972427368164, "rewards/rejected": -19.530841827392578, "step": 3471 }, { "epoch": 1.18, "learning_rate": 7.516955800000695e-07, "logits/chosen": -0.06671328842639923, "logits/rejected": -0.020463673397898674, "logps/chosen": -158.559326171875, "logps/rejected": -221.5818328857422, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.541101098060608, "rewards/margins": 11.345939636230469, "rewards/rejected": -12.887040138244629, "step": 3472 }, { "epoch": 1.19, "learning_rate": 7.511602195703027e-07, "logits/chosen": -0.12898407876491547, "logits/rejected": -0.1020977720618248, "logps/chosen": -222.33590698242188, "logps/rejected": -412.7494201660156, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.04511129856109619, "rewards/margins": 19.659021377563477, "rewards/rejected": -19.704133987426758, "step": 3473 }, { "epoch": 1.19, "learning_rate": 7.506249351576552e-07, "logits/chosen": -0.12835904955863953, "logits/rejected": -0.1239326223731041, "logps/chosen": -257.8398742675781, "logps/rejected": -436.4862060546875, "loss": 0.0161, "rewards/accuracies": 0.9375, "rewards/chosen": 0.054238736629486084, "rewards/margins": 16.97061538696289, "rewards/rejected": -16.91637420654297, "step": 3474 }, { "epoch": 1.19, "learning_rate": 7.500897269256483e-07, "logits/chosen": -0.06667084246873856, "logits/rejected": -0.051138848066329956, "logps/chosen": -215.06007385253906, "logps/rejected": -401.01458740234375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.6877943277359009, "rewards/margins": 17.82607650756836, "rewards/rejected": -19.513872146606445, "step": 3475 }, { "epoch": 1.19, "learning_rate": 7.49554595037781e-07, "logits/chosen": -0.08532720059156418, "logits/rejected": -0.054047055542469025, "logps/chosen": -178.23333740234375, "logps/rejected": -320.03375244140625, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -0.7157686948776245, "rewards/margins": 15.858409881591797, "rewards/rejected": -16.57417869567871, "step": 3476 }, { "epoch": 1.19, "learning_rate": 7.490195396575287e-07, "logits/chosen": -0.07013984024524689, "logits/rejected": -0.06465233862400055, "logps/chosen": -174.81320190429688, "logps/rejected": -305.29400634765625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2326791286468506, "rewards/margins": 12.680538177490234, "rewards/rejected": -13.913217544555664, "step": 3477 }, { "epoch": 1.19, "learning_rate": 7.484845609483434e-07, "logits/chosen": -0.11072526127099991, "logits/rejected": -0.05235530808568001, "logps/chosen": -193.03965759277344, "logps/rejected": -314.46990966796875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2087433338165283, "rewards/margins": 16.196548461914062, "rewards/rejected": -17.405292510986328, "step": 3478 }, { "epoch": 1.19, "learning_rate": 7.47949659073654e-07, "logits/chosen": -0.08636350929737091, "logits/rejected": -0.07526848465204239, "logps/chosen": -163.53982543945312, "logps/rejected": -328.07025146484375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.30007675290107727, "rewards/margins": 15.686370849609375, "rewards/rejected": -15.986446380615234, "step": 3479 }, { "epoch": 1.19, "learning_rate": 7.474148341968651e-07, "logits/chosen": -0.10508114844560623, "logits/rejected": -0.08227963000535965, "logps/chosen": -181.82778930664062, "logps/rejected": -334.00469970703125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.0878219604492188, "rewards/margins": 16.06960678100586, "rewards/rejected": -17.157428741455078, "step": 3480 }, { "epoch": 1.19, "learning_rate": 7.468800864813587e-07, "logits/chosen": -0.117938332259655, "logits/rejected": -0.1091572493314743, "logps/chosen": -216.71217346191406, "logps/rejected": -398.8768005371094, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.16933105885982513, "rewards/margins": 16.405088424682617, "rewards/rejected": -16.574420928955078, "step": 3481 }, { "epoch": 1.19, "learning_rate": 7.463454160904927e-07, "logits/chosen": -0.0628182590007782, "logits/rejected": -0.0461483858525753, "logps/chosen": -118.79821014404297, "logps/rejected": -227.05218505859375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.52451753616333, "rewards/margins": 10.82960319519043, "rewards/rejected": -12.354122161865234, "step": 3482 }, { "epoch": 1.19, "learning_rate": 7.458108231876014e-07, "logits/chosen": -0.23651078343391418, "logits/rejected": -0.18570946156978607, "logps/chosen": -231.94189453125, "logps/rejected": -440.18902587890625, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.7185170650482178, "rewards/margins": 20.607637405395508, "rewards/rejected": -22.32615852355957, "step": 3483 }, { "epoch": 1.19, "learning_rate": 7.45276307935996e-07, "logits/chosen": -0.13357794284820557, "logits/rejected": -0.10662835091352463, "logps/chosen": -230.57821655273438, "logps/rejected": -384.37628173828125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.150904655456543, "rewards/margins": 16.657140731811523, "rewards/rejected": -18.808042526245117, "step": 3484 }, { "epoch": 1.19, "learning_rate": 7.447418704989634e-07, "logits/chosen": -0.12374024093151093, "logits/rejected": -0.1023346558213234, "logps/chosen": -223.39602661132812, "logps/rejected": -397.45794677734375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6232913732528687, "rewards/margins": 18.071510314941406, "rewards/rejected": -18.694801330566406, "step": 3485 }, { "epoch": 1.19, "learning_rate": 7.442075110397663e-07, "logits/chosen": -0.04787464812397957, "logits/rejected": -0.038280319422483444, "logps/chosen": -202.24856567382812, "logps/rejected": -322.3957824707031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.2319509983062744, "rewards/margins": 14.886005401611328, "rewards/rejected": -16.117956161499023, "step": 3486 }, { "epoch": 1.19, "learning_rate": 7.436732297216448e-07, "logits/chosen": -0.059220049530267715, "logits/rejected": -0.026501331478357315, "logps/chosen": -224.59487915039062, "logps/rejected": -353.2420654296875, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -1.8201106786727905, "rewards/margins": 13.566923141479492, "rewards/rejected": -15.387033462524414, "step": 3487 }, { "epoch": 1.19, "learning_rate": 7.431390267078141e-07, "logits/chosen": -0.10406338423490524, "logits/rejected": -0.068405881524086, "logps/chosen": -251.38327026367188, "logps/rejected": -357.1888732910156, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.4691115617752075, "rewards/margins": 15.142098426818848, "rewards/rejected": -15.611210823059082, "step": 3488 }, { "epoch": 1.19, "learning_rate": 7.426049021614662e-07, "logits/chosen": -0.20655854046344757, "logits/rejected": -0.14027953147888184, "logps/chosen": -265.737548828125, "logps/rejected": -420.03369140625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.054743945598602295, "rewards/margins": 19.364572525024414, "rewards/rejected": -19.30982780456543, "step": 3489 }, { "epoch": 1.19, "learning_rate": 7.420708562457685e-07, "logits/chosen": -0.09517867863178253, "logits/rejected": -0.06546655297279358, "logps/chosen": -214.17236328125, "logps/rejected": -310.6482849121094, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.3309929370880127, "rewards/margins": 11.534353256225586, "rewards/rejected": -12.865346908569336, "step": 3490 }, { "epoch": 1.19, "learning_rate": 7.415368891238646e-07, "logits/chosen": -0.13193419575691223, "logits/rejected": -0.08720561861991882, "logps/chosen": -217.44647216796875, "logps/rejected": -320.8305358886719, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9224931001663208, "rewards/margins": 16.235034942626953, "rewards/rejected": -17.157527923583984, "step": 3491 }, { "epoch": 1.19, "learning_rate": 7.410030009588744e-07, "logits/chosen": -0.00981324166059494, "logits/rejected": 0.001822875114157796, "logps/chosen": -206.53628540039062, "logps/rejected": -362.6136169433594, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.7392973899841309, "rewards/margins": 14.281574249267578, "rewards/rejected": -16.0208740234375, "step": 3492 }, { "epoch": 1.19, "learning_rate": 7.404691919138928e-07, "logits/chosen": -0.06946814060211182, "logits/rejected": -0.0689210444688797, "logps/chosen": -199.3618927001953, "logps/rejected": -386.8159484863281, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.0169589519500732, "rewards/margins": 16.885299682617188, "rewards/rejected": -18.902259826660156, "step": 3493 }, { "epoch": 1.19, "learning_rate": 7.39935462151992e-07, "logits/chosen": -0.1867586374282837, "logits/rejected": -0.17691589891910553, "logps/chosen": -242.80921936035156, "logps/rejected": -371.89093017578125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.7662353515625, "rewards/margins": 13.218745231628418, "rewards/rejected": -14.984980583190918, "step": 3494 }, { "epoch": 1.19, "learning_rate": 7.394018118362182e-07, "logits/chosen": -0.08409292250871658, "logits/rejected": -0.0872148796916008, "logps/chosen": -178.90330505371094, "logps/rejected": -304.33685302734375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2342307567596436, "rewards/margins": 12.1189546585083, "rewards/rejected": -13.353184700012207, "step": 3495 }, { "epoch": 1.19, "learning_rate": 7.388682411295946e-07, "logits/chosen": -0.033936697989702225, "logits/rejected": -0.022023318335413933, "logps/chosen": -113.2490234375, "logps/rejected": -246.6060791015625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1603732109069824, "rewards/margins": 11.9962739944458, "rewards/rejected": -13.156648635864258, "step": 3496 }, { "epoch": 1.19, "learning_rate": 7.383347501951196e-07, "logits/chosen": -0.15436667203903198, "logits/rejected": -0.14744259417057037, "logps/chosen": -210.6865234375, "logps/rejected": -351.31964111328125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.0426944494247437, "rewards/margins": 13.069997787475586, "rewards/rejected": -14.112691879272461, "step": 3497 }, { "epoch": 1.19, "learning_rate": 7.378013391957671e-07, "logits/chosen": -0.03343837335705757, "logits/rejected": 0.010956167243421078, "logps/chosen": -184.4823760986328, "logps/rejected": -290.3751220703125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.7904882431030273, "rewards/margins": 14.683145523071289, "rewards/rejected": -15.473634719848633, "step": 3498 }, { "epoch": 1.19, "learning_rate": 7.372680082944875e-07, "logits/chosen": -0.01945669576525688, "logits/rejected": -0.004099218174815178, "logps/chosen": -132.45303344726562, "logps/rejected": -302.5845031738281, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.6270586252212524, "rewards/margins": 15.352165222167969, "rewards/rejected": -15.979225158691406, "step": 3499 }, { "epoch": 1.19, "learning_rate": 7.367347576542057e-07, "logits/chosen": -0.10971714556217194, "logits/rejected": -0.08017575740814209, "logps/chosen": -189.82470703125, "logps/rejected": -414.6354064941406, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.4720603227615356, "rewards/margins": 20.56928062438965, "rewards/rejected": -22.04134178161621, "step": 3500 }, { "epoch": 1.19, "learning_rate": 7.362015874378218e-07, "logits/chosen": 0.04170031473040581, "logits/rejected": 0.04935604706406593, "logps/chosen": -184.3903045654297, "logps/rejected": -388.18902587890625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.899182677268982, "rewards/margins": 18.411884307861328, "rewards/rejected": -20.311067581176758, "step": 3501 }, { "epoch": 1.2, "learning_rate": 7.356684978082128e-07, "logits/chosen": -0.054199691861867905, "logits/rejected": -0.05238952860236168, "logps/chosen": -192.37579345703125, "logps/rejected": -369.55316162109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.576104164123535, "rewards/margins": 14.926858901977539, "rewards/rejected": -17.502962112426758, "step": 3502 }, { "epoch": 1.2, "learning_rate": 7.3513548892823e-07, "logits/chosen": -0.031507886946201324, "logits/rejected": 0.013419036753475666, "logps/chosen": -220.8915557861328, "logps/rejected": -298.8612976074219, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.23037010431289673, "rewards/margins": 13.222984313964844, "rewards/rejected": -13.453354835510254, "step": 3503 }, { "epoch": 1.2, "learning_rate": 7.346025609606995e-07, "logits/chosen": -0.0757678970694542, "logits/rejected": -0.01998109742999077, "logps/chosen": -240.27328491210938, "logps/rejected": -328.28350830078125, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -1.050490379333496, "rewards/margins": 15.875185012817383, "rewards/rejected": -16.925676345825195, "step": 3504 }, { "epoch": 1.2, "learning_rate": 7.340697140684244e-07, "logits/chosen": -0.11471828818321228, "logits/rejected": -0.06748177856206894, "logps/chosen": -226.3709716796875, "logps/rejected": -356.93035888671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8960698246955872, "rewards/margins": 16.867292404174805, "rewards/rejected": -17.763362884521484, "step": 3505 }, { "epoch": 1.2, "learning_rate": 7.335369484141818e-07, "logits/chosen": -0.07865241169929504, "logits/rejected": -0.05736416578292847, "logps/chosen": -236.35833740234375, "logps/rejected": -420.7804260253906, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.131485939025879, "rewards/margins": 18.032733917236328, "rewards/rejected": -21.16421890258789, "step": 3506 }, { "epoch": 1.2, "learning_rate": 7.330042641607241e-07, "logits/chosen": -0.05455814674496651, "logits/rejected": -0.01764151267707348, "logps/chosen": -144.76629638671875, "logps/rejected": -237.2648468017578, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.066650629043579, "rewards/margins": 12.170412063598633, "rewards/rejected": -13.237062454223633, "step": 3507 }, { "epoch": 1.2, "learning_rate": 7.324716614707793e-07, "logits/chosen": -0.09010086208581924, "logits/rejected": -0.055014584213495255, "logps/chosen": -187.02992248535156, "logps/rejected": -347.4610290527344, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.123935580253601, "rewards/margins": 15.46873950958252, "rewards/rejected": -16.592676162719727, "step": 3508 }, { "epoch": 1.2, "learning_rate": 7.319391405070496e-07, "logits/chosen": 0.01168859750032425, "logits/rejected": 0.03161821514368057, "logps/chosen": -196.3416748046875, "logps/rejected": -365.2234191894531, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.262791633605957, "rewards/margins": 16.117238998413086, "rewards/rejected": -17.38003158569336, "step": 3509 }, { "epoch": 1.2, "learning_rate": 7.314067014322135e-07, "logits/chosen": -0.06461811810731888, "logits/rejected": -0.0371997207403183, "logps/chosen": -184.77392578125, "logps/rejected": -293.819091796875, "loss": 0.0229, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9671772718429565, "rewards/margins": 11.719857215881348, "rewards/rejected": -13.687034606933594, "step": 3510 }, { "epoch": 1.2, "learning_rate": 7.308743444089231e-07, "logits/chosen": -0.13460540771484375, "logits/rejected": -0.10469178110361099, "logps/chosen": -272.5213317871094, "logps/rejected": -502.16424560546875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.056183263659477234, "rewards/margins": 22.84759521484375, "rewards/rejected": -22.791412353515625, "step": 3511 }, { "epoch": 1.2, "learning_rate": 7.303420695998068e-07, "logits/chosen": -0.01768430694937706, "logits/rejected": 0.017909305170178413, "logps/chosen": -216.91775512695312, "logps/rejected": -353.1759338378906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.25286602973938, "rewards/margins": 14.972042083740234, "rewards/rejected": -17.22490692138672, "step": 3512 }, { "epoch": 1.2, "learning_rate": 7.298098771674668e-07, "logits/chosen": 0.06901932507753372, "logits/rejected": 0.10589899867773056, "logps/chosen": -140.08355712890625, "logps/rejected": -329.7806091308594, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.730722188949585, "rewards/margins": 17.275876998901367, "rewards/rejected": -19.00659942626953, "step": 3513 }, { "epoch": 1.2, "learning_rate": 7.292777672744802e-07, "logits/chosen": -0.13713021576404572, "logits/rejected": -0.060526225715875626, "logps/chosen": -264.09893798828125, "logps/rejected": -342.45550537109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.10899616777896881, "rewards/margins": 16.471040725708008, "rewards/rejected": -16.580036163330078, "step": 3514 }, { "epoch": 1.2, "learning_rate": 7.287457400834003e-07, "logits/chosen": 0.009660826995968819, "logits/rejected": 0.0347847081720829, "logps/chosen": -194.04656982421875, "logps/rejected": -296.5450439453125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.6410592794418335, "rewards/margins": 12.732187271118164, "rewards/rejected": -13.373247146606445, "step": 3515 }, { "epoch": 1.2, "learning_rate": 7.282137957567527e-07, "logits/chosen": -0.07428457587957382, "logits/rejected": -0.04389511048793793, "logps/chosen": -202.2473602294922, "logps/rejected": -332.66705322265625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.960025429725647, "rewards/margins": 16.972320556640625, "rewards/rejected": -17.93234634399414, "step": 3516 }, { "epoch": 1.2, "learning_rate": 7.276819344570404e-07, "logits/chosen": -0.13928279280662537, "logits/rejected": -0.1195790022611618, "logps/chosen": -199.56915283203125, "logps/rejected": -377.06854248046875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.4783689975738525, "rewards/margins": 17.44261932373047, "rewards/rejected": -19.92098617553711, "step": 3517 }, { "epoch": 1.2, "learning_rate": 7.27150156346739e-07, "logits/chosen": -0.15471108257770538, "logits/rejected": -0.09671103954315186, "logps/chosen": -206.99319458007812, "logps/rejected": -299.4560852050781, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.9706847071647644, "rewards/margins": 14.288531303405762, "rewards/rejected": -15.25921630859375, "step": 3518 }, { "epoch": 1.2, "learning_rate": 7.26618461588299e-07, "logits/chosen": -0.019312232732772827, "logits/rejected": -0.0066065131686627865, "logps/chosen": -182.4541473388672, "logps/rejected": -339.8964538574219, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -1.8616482019424438, "rewards/margins": 16.338916778564453, "rewards/rejected": -18.2005672454834, "step": 3519 }, { "epoch": 1.2, "learning_rate": 7.260868503441465e-07, "logits/chosen": -0.14312651753425598, "logits/rejected": -0.11053775250911713, "logps/chosen": -239.37876892089844, "logps/rejected": -443.373046875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.9644945859909058, "rewards/margins": 20.96600914001465, "rewards/rejected": -21.930503845214844, "step": 3520 }, { "epoch": 1.2, "learning_rate": 7.255553227766811e-07, "logits/chosen": -0.05010136216878891, "logits/rejected": -0.03625292330980301, "logps/chosen": -187.21688842773438, "logps/rejected": -342.7234802246094, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.0620265007019043, "rewards/margins": 15.272106170654297, "rewards/rejected": -18.334135055541992, "step": 3521 }, { "epoch": 1.2, "learning_rate": 7.250238790482772e-07, "logits/chosen": 0.01013174932450056, "logits/rejected": 0.025469133630394936, "logps/chosen": -178.92405700683594, "logps/rejected": -321.3256530761719, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.192018747329712, "rewards/margins": 15.231783866882324, "rewards/rejected": -17.42380142211914, "step": 3522 }, { "epoch": 1.2, "learning_rate": 7.244925193212836e-07, "logits/chosen": -0.014213777147233486, "logits/rejected": -0.012869751080870628, "logps/chosen": -106.53343963623047, "logps/rejected": -267.57794189453125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.28335750102996826, "rewards/margins": 13.535534858703613, "rewards/rejected": -13.818893432617188, "step": 3523 }, { "epoch": 1.2, "learning_rate": 7.23961243758023e-07, "logits/chosen": -0.14745858311653137, "logits/rejected": -0.07491297274827957, "logps/chosen": -254.68284606933594, "logps/rejected": -300.21990966796875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203819394111633, "rewards/margins": 13.372274398803711, "rewards/rejected": -14.192656517028809, "step": 3524 }, { "epoch": 1.2, "learning_rate": 7.234300525207932e-07, "logits/chosen": -0.07519388198852539, "logits/rejected": -0.053915105760097504, "logps/chosen": -211.7883758544922, "logps/rejected": -387.0000915527344, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -2.2735393047332764, "rewards/margins": 17.131559371948242, "rewards/rejected": -19.40509796142578, "step": 3525 }, { "epoch": 1.2, "learning_rate": 7.228989457718655e-07, "logits/chosen": -0.06139048933982849, "logits/rejected": -0.043161481618881226, "logps/chosen": -164.21185302734375, "logps/rejected": -356.3066711425781, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.8525676131248474, "rewards/margins": 17.684091567993164, "rewards/rejected": -18.536659240722656, "step": 3526 }, { "epoch": 1.2, "learning_rate": 7.22367923673486e-07, "logits/chosen": -0.11470546573400497, "logits/rejected": -0.08796121925115585, "logps/chosen": -179.7074737548828, "logps/rejected": -335.0123291015625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4329829216003418, "rewards/margins": 15.782953262329102, "rewards/rejected": -17.2159366607666, "step": 3527 }, { "epoch": 1.2, "learning_rate": 7.218369863878744e-07, "logits/chosen": -0.20824049413204193, "logits/rejected": -0.1870695799589157, "logps/chosen": -182.80462646484375, "logps/rejected": -342.6532897949219, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.23411940038204193, "rewards/margins": 15.418384552001953, "rewards/rejected": -15.652502059936523, "step": 3528 }, { "epoch": 1.2, "learning_rate": 7.213061340772242e-07, "logits/chosen": -0.12093200534582138, "logits/rejected": -0.09046697616577148, "logps/chosen": -281.834716796875, "logps/rejected": -455.34515380859375, "loss": 0.0157, "rewards/accuracies": 0.9375, "rewards/chosen": -2.336153507232666, "rewards/margins": 20.328899383544922, "rewards/rejected": -22.665054321289062, "step": 3529 }, { "epoch": 1.2, "learning_rate": 7.207753669037047e-07, "logits/chosen": 0.015544869005680084, "logits/rejected": 0.0016310237115249038, "logps/chosen": -129.02896118164062, "logps/rejected": -339.4816589355469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.5326924324035645, "rewards/margins": 16.6159725189209, "rewards/rejected": -19.148664474487305, "step": 3530 }, { "epoch": 1.21, "learning_rate": 7.202446850294565e-07, "logits/chosen": -0.06017661839723587, "logits/rejected": -0.05474868044257164, "logps/chosen": -247.65565490722656, "logps/rejected": -413.0196533203125, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.7664353847503662, "rewards/margins": 15.661581039428711, "rewards/rejected": -17.428014755249023, "step": 3531 }, { "epoch": 1.21, "learning_rate": 7.197140886165969e-07, "logits/chosen": -0.0025595631450414658, "logits/rejected": 0.036229606717824936, "logps/chosen": -194.09735107421875, "logps/rejected": -274.97833251953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5115355253219604, "rewards/margins": 11.418890953063965, "rewards/rejected": -12.930425643920898, "step": 3532 }, { "epoch": 1.21, "learning_rate": 7.191835778272153e-07, "logits/chosen": -0.137851282954216, "logits/rejected": -0.09739035367965698, "logps/chosen": -195.6243438720703, "logps/rejected": -260.06573486328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5474641919136047, "rewards/margins": 12.342968940734863, "rewards/rejected": -12.890434265136719, "step": 3533 }, { "epoch": 1.21, "learning_rate": 7.186531528233746e-07, "logits/chosen": -0.08272107690572739, "logits/rejected": -0.06010710075497627, "logps/chosen": -191.05166625976562, "logps/rejected": -309.84625244140625, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.0233445167541504, "rewards/margins": 14.386774063110352, "rewards/rejected": -15.41011905670166, "step": 3534 }, { "epoch": 1.21, "learning_rate": 7.181228137671136e-07, "logits/chosen": -0.010668464936316013, "logits/rejected": 0.010034886188805103, "logps/chosen": -142.7843780517578, "logps/rejected": -285.766845703125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.49549952149391174, "rewards/margins": 12.195122718811035, "rewards/rejected": -12.690624237060547, "step": 3535 }, { "epoch": 1.21, "learning_rate": 7.175925608204427e-07, "logits/chosen": -0.10223771631717682, "logits/rejected": -0.058318573981523514, "logps/chosen": -269.29217529296875, "logps/rejected": -365.1522216796875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.5280020236968994, "rewards/margins": 15.22891616821289, "rewards/rejected": -15.756917953491211, "step": 3536 }, { "epoch": 1.21, "learning_rate": 7.170623941453475e-07, "logits/chosen": -0.029309548437595367, "logits/rejected": -0.013357007876038551, "logps/chosen": -153.0652618408203, "logps/rejected": -315.83050537109375, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -3.6354241371154785, "rewards/margins": 13.801680564880371, "rewards/rejected": -17.437105178833008, "step": 3537 }, { "epoch": 1.21, "learning_rate": 7.165323139037862e-07, "logits/chosen": -0.0654057189822197, "logits/rejected": -0.059193190187215805, "logps/chosen": -172.0688934326172, "logps/rejected": -390.79736328125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6154950857162476, "rewards/margins": 18.44465446472168, "rewards/rejected": -19.060148239135742, "step": 3538 }, { "epoch": 1.21, "learning_rate": 7.160023202576908e-07, "logits/chosen": -0.10407990962266922, "logits/rejected": -0.07958813011646271, "logps/chosen": -238.86558532714844, "logps/rejected": -455.033935546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.406636357307434, "rewards/margins": 21.879470825195312, "rewards/rejected": -23.286108016967773, "step": 3539 }, { "epoch": 1.21, "learning_rate": 7.154724133689676e-07, "logits/chosen": -0.08933761715888977, "logits/rejected": -0.06783850491046906, "logps/chosen": -175.960693359375, "logps/rejected": -320.8679504394531, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.058265209197998, "rewards/margins": 13.411789894104004, "rewards/rejected": -15.470054626464844, "step": 3540 }, { "epoch": 1.21, "learning_rate": 7.149425933994955e-07, "logits/chosen": -0.11201407015323639, "logits/rejected": -0.09968982636928558, "logps/chosen": -200.69544982910156, "logps/rejected": -424.63134765625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.139373302459717, "rewards/margins": 17.8848934173584, "rewards/rejected": -20.024267196655273, "step": 3541 }, { "epoch": 1.21, "learning_rate": 7.14412860511127e-07, "logits/chosen": -0.09337671846151352, "logits/rejected": -0.04162143915891647, "logps/chosen": -199.04165649414062, "logps/rejected": -339.3492736816406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.020193904638290405, "rewards/margins": 19.032480239868164, "rewards/rejected": -19.05267333984375, "step": 3542 }, { "epoch": 1.21, "learning_rate": 7.138832148656888e-07, "logits/chosen": -0.026538904756307602, "logits/rejected": -0.013269164599478245, "logps/chosen": -214.72451782226562, "logps/rejected": -315.4669494628906, "loss": 0.0323, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2122719287872314, "rewards/margins": 9.552268028259277, "rewards/rejected": -11.76453971862793, "step": 3543 }, { "epoch": 1.21, "learning_rate": 7.133536566249794e-07, "logits/chosen": -0.07055305689573288, "logits/rejected": -0.05546825751662254, "logps/chosen": -217.38156127929688, "logps/rejected": -364.71435546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6342144012451172, "rewards/margins": 14.335278511047363, "rewards/rejected": -14.969491004943848, "step": 3544 }, { "epoch": 1.21, "learning_rate": 7.128241859507726e-07, "logits/chosen": -0.060519613325595856, "logits/rejected": -0.041188061237335205, "logps/chosen": -190.5098876953125, "logps/rejected": -287.0041809082031, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.2012595534324646, "rewards/margins": 12.294296264648438, "rewards/rejected": -12.495556831359863, "step": 3545 }, { "epoch": 1.21, "learning_rate": 7.122948030048139e-07, "logits/chosen": 0.01468837633728981, "logits/rejected": 0.04075537994503975, "logps/chosen": -159.19949340820312, "logps/rejected": -313.79241943359375, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.234548568725586, "rewards/margins": 16.172637939453125, "rewards/rejected": -17.40718650817871, "step": 3546 }, { "epoch": 1.21, "learning_rate": 7.117655079488221e-07, "logits/chosen": -0.16944602131843567, "logits/rejected": -0.14153127372264862, "logps/chosen": -217.591552734375, "logps/rejected": -384.099853515625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.3380002975463867, "rewards/margins": 17.18863296508789, "rewards/rejected": -19.526634216308594, "step": 3547 }, { "epoch": 1.21, "learning_rate": 7.112363009444903e-07, "logits/chosen": 0.08466722816228867, "logits/rejected": 0.10000590234994888, "logps/chosen": -161.14199829101562, "logps/rejected": -286.416015625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.1974608898162842, "rewards/margins": 13.817240715026855, "rewards/rejected": -15.014701843261719, "step": 3548 }, { "epoch": 1.21, "learning_rate": 7.107071821534829e-07, "logits/chosen": -0.09313712269067764, "logits/rejected": -0.06753683090209961, "logps/chosen": -218.42752075195312, "logps/rejected": -373.5191955566406, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.1556813716888428, "rewards/margins": 15.25663948059082, "rewards/rejected": -16.412321090698242, "step": 3549 }, { "epoch": 1.21, "learning_rate": 7.101781517374396e-07, "logits/chosen": -0.004622785374522209, "logits/rejected": 0.01201822329312563, "logps/chosen": -149.06588745117188, "logps/rejected": -315.34326171875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.6245816946029663, "rewards/margins": 15.94410514831543, "rewards/rejected": -17.568687438964844, "step": 3550 }, { "epoch": 1.21, "learning_rate": 7.096492098579713e-07, "logits/chosen": -0.046038947999477386, "logits/rejected": -0.030636196956038475, "logps/chosen": -173.70518493652344, "logps/rejected": -306.0062561035156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.310727596282959, "rewards/margins": 14.709087371826172, "rewards/rejected": -17.019813537597656, "step": 3551 }, { "epoch": 1.21, "learning_rate": 7.091203566766622e-07, "logits/chosen": -0.05045268312096596, "logits/rejected": -0.036539603024721146, "logps/chosen": -174.86441040039062, "logps/rejected": -313.257568359375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.2482187747955322, "rewards/margins": 12.437216758728027, "rewards/rejected": -13.685434341430664, "step": 3552 }, { "epoch": 1.21, "learning_rate": 7.0859159235507e-07, "logits/chosen": -0.13622374832630157, "logits/rejected": -0.104689821600914, "logps/chosen": -242.81915283203125, "logps/rejected": -395.1403503417969, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.36948823928833, "rewards/margins": 14.457316398620605, "rewards/rejected": -16.82680320739746, "step": 3553 }, { "epoch": 1.21, "learning_rate": 7.080629170547248e-07, "logits/chosen": -0.039906881749629974, "logits/rejected": 0.004658490885049105, "logps/chosen": -199.52154541015625, "logps/rejected": -340.6387939453125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.21356332302093506, "rewards/margins": 17.196353912353516, "rewards/rejected": -17.4099178314209, "step": 3554 }, { "epoch": 1.21, "learning_rate": 7.075343309371297e-07, "logits/chosen": 0.037003327161073685, "logits/rejected": 0.05440008267760277, "logps/chosen": -165.37750244140625, "logps/rejected": -299.7865295410156, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -2.757610559463501, "rewards/margins": 11.63674545288086, "rewards/rejected": -14.394353866577148, "step": 3555 }, { "epoch": 1.21, "learning_rate": 7.070058341637604e-07, "logits/chosen": -0.07362443208694458, "logits/rejected": -0.03772696107625961, "logps/chosen": -168.4514923095703, "logps/rejected": -377.3810119628906, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.1501374244689941, "rewards/margins": 19.742855072021484, "rewards/rejected": -20.892993927001953, "step": 3556 }, { "epoch": 1.21, "learning_rate": 7.064774268960652e-07, "logits/chosen": -0.0070992582477629185, "logits/rejected": 0.017487628385424614, "logps/chosen": -178.68719482421875, "logps/rejected": -348.5908203125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.8585907220840454, "rewards/margins": 15.065999031066895, "rewards/rejected": -16.924589157104492, "step": 3557 }, { "epoch": 1.21, "learning_rate": 7.059491092954657e-07, "logits/chosen": -0.022198399528861046, "logits/rejected": 0.0026243876200169325, "logps/chosen": -230.698486328125, "logps/rejected": -366.0262451171875, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -0.3738819360733032, "rewards/margins": 15.9887113571167, "rewards/rejected": -16.362592697143555, "step": 3558 }, { "epoch": 1.21, "learning_rate": 7.054208815233547e-07, "logits/chosen": -0.023316990584135056, "logits/rejected": -0.000351797032635659, "logps/chosen": -176.9319305419922, "logps/rejected": -334.8052673339844, "loss": 0.017, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7345666885375977, "rewards/margins": 13.83238697052002, "rewards/rejected": -16.566953659057617, "step": 3559 }, { "epoch": 1.22, "learning_rate": 7.048927437410999e-07, "logits/chosen": -0.06265916675329208, "logits/rejected": -0.015956280753016472, "logps/chosen": -202.13035583496094, "logps/rejected": -288.7463073730469, "loss": 0.0253, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1795467138290405, "rewards/margins": 14.577775955200195, "rewards/rejected": -15.75732135772705, "step": 3560 }, { "epoch": 1.22, "learning_rate": 7.043646961100389e-07, "logits/chosen": -0.05321237072348595, "logits/rejected": 0.01106964610517025, "logps/chosen": -235.86380004882812, "logps/rejected": -418.3262634277344, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.3375627994537354, "rewards/margins": 21.507301330566406, "rewards/rejected": -22.84486198425293, "step": 3561 }, { "epoch": 1.22, "learning_rate": 7.038367387914832e-07, "logits/chosen": -0.06927163898944855, "logits/rejected": -0.07357804477214813, "logps/chosen": -243.92355346679688, "logps/rejected": -411.1385498046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6447010636329651, "rewards/margins": 18.046098709106445, "rewards/rejected": -18.690799713134766, "step": 3562 }, { "epoch": 1.22, "learning_rate": 7.033088719467171e-07, "logits/chosen": 0.04207334294915199, "logits/rejected": 0.07545027881860733, "logps/chosen": -169.86521911621094, "logps/rejected": -294.15478515625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.7053021788597107, "rewards/margins": 15.105002403259277, "rewards/rejected": -15.810304641723633, "step": 3563 }, { "epoch": 1.22, "learning_rate": 7.027810957369956e-07, "logits/chosen": -0.037195973098278046, "logits/rejected": -0.0068229492753744125, "logps/chosen": -189.51934814453125, "logps/rejected": -277.394775390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.1395089477300644, "rewards/margins": 13.466631889343262, "rewards/rejected": -13.327122688293457, "step": 3564 }, { "epoch": 1.22, "learning_rate": 7.022534103235481e-07, "logits/chosen": -0.026325145736336708, "logits/rejected": 0.01711459271609783, "logps/chosen": -162.0494384765625, "logps/rejected": -341.87518310546875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.5024185180664062, "rewards/margins": 17.627853393554688, "rewards/rejected": -19.130268096923828, "step": 3565 }, { "epoch": 1.22, "learning_rate": 7.017258158675747e-07, "logits/chosen": -0.13739027082920074, "logits/rejected": -0.10569828748703003, "logps/chosen": -232.4633331298828, "logps/rejected": -358.3951416015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1248153448104858, "rewards/margins": 15.940262794494629, "rewards/rejected": -17.065078735351562, "step": 3566 }, { "epoch": 1.22, "learning_rate": 7.011983125302482e-07, "logits/chosen": -0.11322414875030518, "logits/rejected": -0.10043036192655563, "logps/chosen": -193.17369079589844, "logps/rejected": -378.69091796875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.363358974456787, "rewards/margins": 17.21259117126465, "rewards/rejected": -18.575950622558594, "step": 3567 }, { "epoch": 1.22, "learning_rate": 7.006709004727139e-07, "logits/chosen": 0.05942971631884575, "logits/rejected": 0.07138928025960922, "logps/chosen": -119.50293731689453, "logps/rejected": -229.39048767089844, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.8353662490844727, "rewards/margins": 11.304272651672363, "rewards/rejected": -14.139639854431152, "step": 3568 }, { "epoch": 1.22, "learning_rate": 7.001435798560883e-07, "logits/chosen": 0.01922299526631832, "logits/rejected": 0.04462691769003868, "logps/chosen": -133.35861206054688, "logps/rejected": -259.5596923828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.921796202659607, "rewards/margins": 14.231729507446289, "rewards/rejected": -16.153526306152344, "step": 3569 }, { "epoch": 1.22, "learning_rate": 6.996163508414612e-07, "logits/chosen": -0.13250836730003357, "logits/rejected": -0.10163592547178268, "logps/chosen": -235.3274688720703, "logps/rejected": -428.88287353515625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4697805643081665, "rewards/margins": 19.861595153808594, "rewards/rejected": -21.331377029418945, "step": 3570 }, { "epoch": 1.22, "learning_rate": 6.990892135898936e-07, "logits/chosen": 0.0010285130701959133, "logits/rejected": 0.016867682337760925, "logps/chosen": -235.9719696044922, "logps/rejected": -370.1866455078125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.006445050239562988, "rewards/margins": 16.903518676757812, "rewards/rejected": -16.909961700439453, "step": 3571 }, { "epoch": 1.22, "learning_rate": 6.985621682624183e-07, "logits/chosen": -0.042527906596660614, "logits/rejected": -0.03805622085928917, "logps/chosen": -199.69131469726562, "logps/rejected": -359.4277038574219, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7857519388198853, "rewards/margins": 14.651037216186523, "rewards/rejected": -16.43678855895996, "step": 3572 }, { "epoch": 1.22, "learning_rate": 6.980352150200408e-07, "logits/chosen": 0.03874989226460457, "logits/rejected": 0.05941394716501236, "logps/chosen": -183.92848205566406, "logps/rejected": -364.2136535644531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0278606414794922, "rewards/margins": 16.58714485168457, "rewards/rejected": -17.615005493164062, "step": 3573 }, { "epoch": 1.22, "learning_rate": 6.975083540237379e-07, "logits/chosen": -0.06855825334787369, "logits/rejected": -0.022549010813236237, "logps/chosen": -288.41424560546875, "logps/rejected": -439.19219970703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7423453330993652, "rewards/margins": 18.42215919494629, "rewards/rejected": -19.164501190185547, "step": 3574 }, { "epoch": 1.22, "learning_rate": 6.969815854344585e-07, "logits/chosen": -0.14534127712249756, "logits/rejected": -0.08383791893720627, "logps/chosen": -281.0955810546875, "logps/rejected": -435.2283630371094, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -2.815196990966797, "rewards/margins": 15.509187698364258, "rewards/rejected": -18.324386596679688, "step": 3575 }, { "epoch": 1.22, "learning_rate": 6.964549094131229e-07, "logits/chosen": -0.01392068900167942, "logits/rejected": 0.03016766719520092, "logps/chosen": -202.18414306640625, "logps/rejected": -349.3996276855469, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6001443862915039, "rewards/margins": 17.85491180419922, "rewards/rejected": -18.455055236816406, "step": 3576 }, { "epoch": 1.22, "learning_rate": 6.959283261206231e-07, "logits/chosen": 0.04037277400493622, "logits/rejected": 0.04736815765500069, "logps/chosen": -157.32781982421875, "logps/rejected": -294.92352294921875, "loss": 0.0194, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7137248516082764, "rewards/margins": 13.55803108215332, "rewards/rejected": -15.271754264831543, "step": 3577 }, { "epoch": 1.22, "learning_rate": 6.95401835717824e-07, "logits/chosen": -0.07198009639978409, "logits/rejected": -0.04227861016988754, "logps/chosen": -201.78732299804688, "logps/rejected": -333.6269226074219, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.234884262084961, "rewards/margins": 13.336954116821289, "rewards/rejected": -15.57183837890625, "step": 3578 }, { "epoch": 1.22, "learning_rate": 6.948754383655604e-07, "logits/chosen": -0.06891781836748123, "logits/rejected": -0.02828199788928032, "logps/chosen": -215.45648193359375, "logps/rejected": -345.08514404296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.8401468992233276, "rewards/margins": 16.48103904724121, "rewards/rejected": -18.321186065673828, "step": 3579 }, { "epoch": 1.22, "learning_rate": 6.943491342246393e-07, "logits/chosen": -0.0846601203083992, "logits/rejected": -0.06936615705490112, "logps/chosen": -235.29876708984375, "logps/rejected": -428.07037353515625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.186208963394165, "rewards/margins": 18.534080505371094, "rewards/rejected": -19.72028923034668, "step": 3580 }, { "epoch": 1.22, "learning_rate": 6.938229234558397e-07, "logits/chosen": 0.0032544778659939766, "logits/rejected": 0.03776445612311363, "logps/chosen": -205.72314453125, "logps/rejected": -376.3642578125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8816951513290405, "rewards/margins": 19.43815040588379, "rewards/rejected": -18.556455612182617, "step": 3581 }, { "epoch": 1.22, "learning_rate": 6.932968062199116e-07, "logits/chosen": 0.002637910656630993, "logits/rejected": 0.03624492883682251, "logps/chosen": -189.570556640625, "logps/rejected": -305.7480773925781, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.4681073427200317, "rewards/margins": 14.836883544921875, "rewards/rejected": -16.30499267578125, "step": 3582 }, { "epoch": 1.22, "learning_rate": 6.927707826775769e-07, "logits/chosen": -0.043261799961328506, "logits/rejected": -0.03806843236088753, "logps/chosen": -171.49472045898438, "logps/rejected": -317.4430236816406, "loss": 0.0281, "rewards/accuracies": 0.9375, "rewards/chosen": -2.878502368927002, "rewards/margins": 12.558083534240723, "rewards/rejected": -15.4365873336792, "step": 3583 }, { "epoch": 1.22, "learning_rate": 6.922448529895282e-07, "logits/chosen": -0.04299088567495346, "logits/rejected": -0.00020612837397493422, "logps/chosen": -189.96726989746094, "logps/rejected": -317.55169677734375, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.0994389057159424, "rewards/margins": 16.7108154296875, "rewards/rejected": -18.810253143310547, "step": 3584 }, { "epoch": 1.22, "learning_rate": 6.917190173164294e-07, "logits/chosen": 0.03594484180212021, "logits/rejected": 0.07114985585212708, "logps/chosen": -204.48626708984375, "logps/rejected": -336.81640625, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.015657901763916, "rewards/margins": 15.635136604309082, "rewards/rejected": -16.650794982910156, "step": 3585 }, { "epoch": 1.22, "learning_rate": 6.911932758189169e-07, "logits/chosen": 0.057670965790748596, "logits/rejected": 0.07670695334672928, "logps/chosen": -195.2304229736328, "logps/rejected": -316.5712585449219, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.5851622819900513, "rewards/margins": 12.889118194580078, "rewards/rejected": -14.474281311035156, "step": 3586 }, { "epoch": 1.22, "learning_rate": 6.906676286575966e-07, "logits/chosen": -0.03633219376206398, "logits/rejected": 0.002198081696406007, "logps/chosen": -245.08543395996094, "logps/rejected": -415.7182312011719, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.14648950099945068, "rewards/margins": 21.46688461303711, "rewards/rejected": -21.613374710083008, "step": 3587 }, { "epoch": 1.22, "learning_rate": 6.901420759930472e-07, "logits/chosen": -0.01022337842732668, "logits/rejected": 0.03193226084113121, "logps/chosen": -217.71923828125, "logps/rejected": -279.5267333984375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.889888346195221, "rewards/margins": 13.772294044494629, "rewards/rejected": -14.662182807922363, "step": 3588 }, { "epoch": 1.22, "learning_rate": 6.896166179858174e-07, "logits/chosen": 0.011178718879818916, "logits/rejected": 0.014303047209978104, "logps/chosen": -150.0179901123047, "logps/rejected": -327.83935546875, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -1.2477924823760986, "rewards/margins": 14.19666862487793, "rewards/rejected": -15.444459915161133, "step": 3589 }, { "epoch": 1.23, "learning_rate": 6.890912547964272e-07, "logits/chosen": -0.04019532725214958, "logits/rejected": -0.00689670629799366, "logps/chosen": -192.5098419189453, "logps/rejected": -280.59930419921875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.3875733017921448, "rewards/margins": 15.350201606750488, "rewards/rejected": -15.737773895263672, "step": 3590 }, { "epoch": 1.23, "learning_rate": 6.885659865853684e-07, "logits/chosen": 0.03232955560088158, "logits/rejected": 0.02384174056351185, "logps/chosen": -181.85833740234375, "logps/rejected": -377.14794921875, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.9779207110404968, "rewards/margins": 17.504074096679688, "rewards/rejected": -18.481996536254883, "step": 3591 }, { "epoch": 1.23, "learning_rate": 6.880408135131022e-07, "logits/chosen": -0.0004724305763375014, "logits/rejected": 0.021949131041765213, "logps/chosen": -236.58184814453125, "logps/rejected": -415.2914123535156, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3952288627624512, "rewards/margins": 18.385334014892578, "rewards/rejected": -19.780561447143555, "step": 3592 }, { "epoch": 1.23, "learning_rate": 6.87515735740063e-07, "logits/chosen": -0.055839888751506805, "logits/rejected": -0.03843124955892563, "logps/chosen": -244.0390625, "logps/rejected": -435.406005859375, "loss": 0.0272, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7853847742080688, "rewards/margins": 18.68512535095215, "rewards/rejected": -20.470508575439453, "step": 3593 }, { "epoch": 1.23, "learning_rate": 6.86990753426654e-07, "logits/chosen": -0.048323627561330795, "logits/rejected": -0.046311166137456894, "logps/chosen": -243.06236267089844, "logps/rejected": -492.1119079589844, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.528109073638916, "rewards/margins": 20.92169761657715, "rewards/rejected": -24.449804306030273, "step": 3594 }, { "epoch": 1.23, "learning_rate": 6.864658667332497e-07, "logits/chosen": 0.07480557262897491, "logits/rejected": 0.10636931657791138, "logps/chosen": -190.90594482421875, "logps/rejected": -359.4221496582031, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5215308666229248, "rewards/margins": 16.408782958984375, "rewards/rejected": -17.930313110351562, "step": 3595 }, { "epoch": 1.23, "learning_rate": 6.859410758201969e-07, "logits/chosen": -0.08306407183408737, "logits/rejected": -0.06049439311027527, "logps/chosen": -189.273193359375, "logps/rejected": -298.74346923828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3853517770767212, "rewards/margins": 13.364114761352539, "rewards/rejected": -14.749467849731445, "step": 3596 }, { "epoch": 1.23, "learning_rate": 6.854163808478107e-07, "logits/chosen": -0.05388021841645241, "logits/rejected": -0.005976318847388029, "logps/chosen": -275.6605529785156, "logps/rejected": -400.8984069824219, "loss": 0.0534, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2402767539024353, "rewards/margins": 19.196739196777344, "rewards/rejected": -19.4370174407959, "step": 3597 }, { "epoch": 1.23, "learning_rate": 6.848917819763793e-07, "logits/chosen": -0.055817510932683945, "logits/rejected": -0.02262282371520996, "logps/chosen": -198.22207641601562, "logps/rejected": -281.9376525878906, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.8931475877761841, "rewards/margins": 12.69417667388916, "rewards/rejected": -13.587322235107422, "step": 3598 }, { "epoch": 1.23, "learning_rate": 6.843672793661599e-07, "logits/chosen": -0.00900818221271038, "logits/rejected": 0.007207747548818588, "logps/chosen": -228.32618713378906, "logps/rejected": -322.9501953125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0136950016021729, "rewards/margins": 12.101322174072266, "rewards/rejected": -13.11501693725586, "step": 3599 }, { "epoch": 1.23, "learning_rate": 6.838428731773806e-07, "logits/chosen": 0.0402519628405571, "logits/rejected": 0.035188790410757065, "logps/chosen": -212.21566772460938, "logps/rejected": -438.2957458496094, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.297179102897644, "rewards/margins": 18.91236114501953, "rewards/rejected": -20.20953941345215, "step": 3600 }, { "epoch": 1.23, "learning_rate": 6.833185635702407e-07, "logits/chosen": -0.10990052670240402, "logits/rejected": -0.047404345124959946, "logps/chosen": -214.95233154296875, "logps/rejected": -291.36346435546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.6662216186523438, "rewards/margins": 14.212274551391602, "rewards/rejected": -15.878498077392578, "step": 3601 }, { "epoch": 1.23, "learning_rate": 6.82794350704909e-07, "logits/chosen": -0.07001660764217377, "logits/rejected": -0.06318856030702591, "logps/chosen": -195.125, "logps/rejected": -306.45526123046875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5313372015953064, "rewards/margins": 12.108634948730469, "rewards/rejected": -12.639970779418945, "step": 3602 }, { "epoch": 1.23, "learning_rate": 6.822702347415259e-07, "logits/chosen": 0.020208783447742462, "logits/rejected": 0.02073168195784092, "logps/chosen": -170.9319610595703, "logps/rejected": -367.1612548828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.4114735126495361, "rewards/margins": 17.4437255859375, "rewards/rejected": -18.855199813842773, "step": 3603 }, { "epoch": 1.23, "learning_rate": 6.817462158402014e-07, "logits/chosen": -0.0743040069937706, "logits/rejected": -0.047483086585998535, "logps/chosen": -208.6761932373047, "logps/rejected": -365.73980712890625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.6161643266677856, "rewards/margins": 15.680923461914062, "rewards/rejected": -17.297086715698242, "step": 3604 }, { "epoch": 1.23, "learning_rate": 6.812222941610157e-07, "logits/chosen": -0.03388116881251335, "logits/rejected": -0.011421751230955124, "logps/chosen": -214.88214111328125, "logps/rejected": -357.248779296875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -2.1420156955718994, "rewards/margins": 14.503684043884277, "rewards/rejected": -16.645700454711914, "step": 3605 }, { "epoch": 1.23, "learning_rate": 6.806984698640201e-07, "logits/chosen": -0.05314961075782776, "logits/rejected": -0.03426983207464218, "logps/chosen": -233.52127075195312, "logps/rejected": -386.80364990234375, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": -1.9827847480773926, "rewards/margins": 15.059317588806152, "rewards/rejected": -17.04210090637207, "step": 3606 }, { "epoch": 1.23, "learning_rate": 6.801747431092352e-07, "logits/chosen": 0.006826732773333788, "logits/rejected": 0.017837153747677803, "logps/chosen": -185.07382202148438, "logps/rejected": -403.9052734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.144172430038452, "rewards/margins": 20.483123779296875, "rewards/rejected": -22.627300262451172, "step": 3607 }, { "epoch": 1.23, "learning_rate": 6.796511140566531e-07, "logits/chosen": 0.037840019911527634, "logits/rejected": 0.07647470384836197, "logps/chosen": -152.3110809326172, "logps/rejected": -297.59674072265625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.9704567193984985, "rewards/margins": 14.297324180603027, "rewards/rejected": -15.267780303955078, "step": 3608 }, { "epoch": 1.23, "learning_rate": 6.791275828662346e-07, "logits/chosen": 0.017274754121899605, "logits/rejected": 0.054680585861206055, "logps/chosen": -202.35072326660156, "logps/rejected": -304.5980529785156, "loss": 0.0588, "rewards/accuracies": 0.875, "rewards/chosen": -1.3836278915405273, "rewards/margins": 13.006755828857422, "rewards/rejected": -14.39038372039795, "step": 3609 }, { "epoch": 1.23, "learning_rate": 6.78604149697911e-07, "logits/chosen": 0.019224219024181366, "logits/rejected": 0.04949851706624031, "logps/chosen": -198.25970458984375, "logps/rejected": -291.86517333984375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.975437879562378, "rewards/margins": 14.340564727783203, "rewards/rejected": -16.316003799438477, "step": 3610 }, { "epoch": 1.23, "learning_rate": 6.780808147115847e-07, "logits/chosen": 0.006209374405443668, "logits/rejected": 0.04832910746335983, "logps/chosen": -206.67642211914062, "logps/rejected": -314.8833923339844, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.106291651725769, "rewards/margins": 13.222318649291992, "rewards/rejected": -14.328611373901367, "step": 3611 }, { "epoch": 1.23, "learning_rate": 6.775575780671266e-07, "logits/chosen": -0.01931505836546421, "logits/rejected": -0.004576814826577902, "logps/chosen": -233.32632446289062, "logps/rejected": -393.104736328125, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.8823399543762207, "rewards/margins": 13.810686111450195, "rewards/rejected": -15.693025588989258, "step": 3612 }, { "epoch": 1.23, "learning_rate": 6.770344399243788e-07, "logits/chosen": -0.056604113429784775, "logits/rejected": -0.034993115812540054, "logps/chosen": -230.31771850585938, "logps/rejected": -377.7694091796875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2856619358062744, "rewards/margins": 15.945765495300293, "rewards/rejected": -17.231426239013672, "step": 3613 }, { "epoch": 1.23, "learning_rate": 6.765114004431527e-07, "logits/chosen": -0.03035091981291771, "logits/rejected": 0.015958765521645546, "logps/chosen": -254.22003173828125, "logps/rejected": -333.3626403808594, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.4162299633026123, "rewards/margins": 15.206625938415527, "rewards/rejected": -16.62285614013672, "step": 3614 }, { "epoch": 1.23, "learning_rate": 6.759884597832293e-07, "logits/chosen": 0.002426750026643276, "logits/rejected": 0.031630873680114746, "logps/chosen": -207.70521545410156, "logps/rejected": -304.7254943847656, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.290327310562134, "rewards/margins": 13.24899673461914, "rewards/rejected": -15.539323806762695, "step": 3615 }, { "epoch": 1.23, "learning_rate": 6.754656181043601e-07, "logits/chosen": -0.03608252480626106, "logits/rejected": -0.01150442659854889, "logps/chosen": -291.191162109375, "logps/rejected": -421.655517578125, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -1.2306010723114014, "rewards/margins": 16.284257888793945, "rewards/rejected": -17.51485824584961, "step": 3616 }, { "epoch": 1.23, "learning_rate": 6.749428755662661e-07, "logits/chosen": -0.18171089887619019, "logits/rejected": -0.13316886126995087, "logps/chosen": -203.2557373046875, "logps/rejected": -273.831787109375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.4234263300895691, "rewards/margins": 13.10123062133789, "rewards/rejected": -13.524657249450684, "step": 3617 }, { "epoch": 1.23, "learning_rate": 6.744202323286372e-07, "logits/chosen": 0.05647112429141998, "logits/rejected": 0.07211194187402725, "logps/chosen": -168.89337158203125, "logps/rejected": -338.65875244140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.10417956113815308, "rewards/margins": 15.666065216064453, "rewards/rejected": -15.561885833740234, "step": 3618 }, { "epoch": 1.24, "learning_rate": 6.738976885511347e-07, "logits/chosen": 0.06337054073810577, "logits/rejected": 0.07721645385026932, "logps/chosen": -182.9414825439453, "logps/rejected": -385.7301025390625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -2.183896064758301, "rewards/margins": 17.55912971496582, "rewards/rejected": -19.74302864074707, "step": 3619 }, { "epoch": 1.24, "learning_rate": 6.733752443933878e-07, "logits/chosen": -0.1333749145269394, "logits/rejected": -0.08270768821239471, "logps/chosen": -244.21533203125, "logps/rejected": -352.5960693359375, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -2.50321888923645, "rewards/margins": 14.488845825195312, "rewards/rejected": -16.9920654296875, "step": 3620 }, { "epoch": 1.24, "learning_rate": 6.728529000149963e-07, "logits/chosen": -0.015582778491079807, "logits/rejected": 0.011142784729599953, "logps/chosen": -231.80352783203125, "logps/rejected": -389.82818603515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.563279390335083, "rewards/margins": 16.146282196044922, "rewards/rejected": -18.70956039428711, "step": 3621 }, { "epoch": 1.24, "learning_rate": 6.723306555755293e-07, "logits/chosen": -0.05874679982662201, "logits/rejected": -0.03176554664969444, "logps/chosen": -208.42926025390625, "logps/rejected": -341.06475830078125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.3991506099700928, "rewards/margins": 15.2802152633667, "rewards/rejected": -16.679363250732422, "step": 3622 }, { "epoch": 1.24, "learning_rate": 6.718085112345246e-07, "logits/chosen": -0.02357400581240654, "logits/rejected": -0.016442710533738136, "logps/chosen": -232.5395050048828, "logps/rejected": -469.2317810058594, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.725905179977417, "rewards/margins": 19.68529510498047, "rewards/rejected": -21.41119956970215, "step": 3623 }, { "epoch": 1.24, "learning_rate": 6.712864671514911e-07, "logits/chosen": -0.05435685068368912, "logits/rejected": -0.02823636867105961, "logps/chosen": -188.9905548095703, "logps/rejected": -368.90863037109375, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.6431320905685425, "rewards/margins": 17.180702209472656, "rewards/rejected": -18.823833465576172, "step": 3624 }, { "epoch": 1.24, "learning_rate": 6.70764523485905e-07, "logits/chosen": -0.09239090979099274, "logits/rejected": -0.054025959223508835, "logps/chosen": -178.57569885253906, "logps/rejected": -341.3050537109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.9484390020370483, "rewards/margins": 15.572609901428223, "rewards/rejected": -17.52104949951172, "step": 3625 }, { "epoch": 1.24, "learning_rate": 6.702426803972139e-07, "logits/chosen": -0.10889318585395813, "logits/rejected": -0.09055870026350021, "logps/chosen": -285.17474365234375, "logps/rejected": -466.2007751464844, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.242748498916626, "rewards/margins": 16.234289169311523, "rewards/rejected": -18.47703742980957, "step": 3626 }, { "epoch": 1.24, "learning_rate": 6.697209380448332e-07, "logits/chosen": -0.0032524329144507647, "logits/rejected": 0.022684751078486443, "logps/chosen": -184.908447265625, "logps/rejected": -325.88336181640625, "loss": 0.065, "rewards/accuracies": 0.9375, "rewards/chosen": -0.58079594373703, "rewards/margins": 15.46292495727539, "rewards/rejected": -16.04372215270996, "step": 3627 }, { "epoch": 1.24, "learning_rate": 6.691992965881475e-07, "logits/chosen": 0.0008384179091081023, "logits/rejected": 0.048732221126556396, "logps/chosen": -310.7734680175781, "logps/rejected": -445.9137878417969, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.269055962562561, "rewards/margins": 18.79975700378418, "rewards/rejected": -20.06881332397461, "step": 3628 }, { "epoch": 1.24, "learning_rate": 6.686777561865118e-07, "logits/chosen": 0.08359116315841675, "logits/rejected": 0.1202547550201416, "logps/chosen": -270.6071472167969, "logps/rejected": -390.9943542480469, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.06692636013031, "rewards/margins": 17.229408264160156, "rewards/rejected": -18.296337127685547, "step": 3629 }, { "epoch": 1.24, "learning_rate": 6.681563169992493e-07, "logits/chosen": -0.028237447142601013, "logits/rejected": -0.01399293914437294, "logps/chosen": -262.9408264160156, "logps/rejected": -421.0595703125, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -0.16336390376091003, "rewards/margins": 16.473739624023438, "rewards/rejected": -16.637102127075195, "step": 3630 }, { "epoch": 1.24, "learning_rate": 6.676349791856524e-07, "logits/chosen": -0.044095229357481, "logits/rejected": 0.006486196536570787, "logps/chosen": -194.93032836914062, "logps/rejected": -317.4644775390625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -2.6856844425201416, "rewards/margins": 13.218771934509277, "rewards/rejected": -15.904457092285156, "step": 3631 }, { "epoch": 1.24, "learning_rate": 6.671137429049827e-07, "logits/chosen": -0.01742604374885559, "logits/rejected": 0.015732888132333755, "logps/chosen": -199.5509033203125, "logps/rejected": -305.9853210449219, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -2.3006434440612793, "rewards/margins": 14.212361335754395, "rewards/rejected": -16.513004302978516, "step": 3632 }, { "epoch": 1.24, "learning_rate": 6.665926083164703e-07, "logits/chosen": -0.05128614231944084, "logits/rejected": -0.026178661733865738, "logps/chosen": -252.40061950683594, "logps/rejected": -346.6453552246094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8804597854614258, "rewards/margins": 13.940655708312988, "rewards/rejected": -14.82111644744873, "step": 3633 }, { "epoch": 1.24, "learning_rate": 6.660715755793154e-07, "logits/chosen": -0.030225135385990143, "logits/rejected": -0.013696079142391682, "logps/chosen": -234.63526916503906, "logps/rejected": -383.8299865722656, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -2.6905019283294678, "rewards/margins": 15.924333572387695, "rewards/rejected": -18.614835739135742, "step": 3634 }, { "epoch": 1.24, "learning_rate": 6.655506448526854e-07, "logits/chosen": -0.09251800924539566, "logits/rejected": -0.047270625829696655, "logps/chosen": -214.37203979492188, "logps/rejected": -339.7911376953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1321090459823608, "rewards/margins": 15.27836799621582, "rewards/rejected": -16.410476684570312, "step": 3635 }, { "epoch": 1.24, "learning_rate": 6.650298162957182e-07, "logits/chosen": -0.04002571105957031, "logits/rejected": -0.007216757163405418, "logps/chosen": -181.60189819335938, "logps/rejected": -372.1836242675781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.4621020555496216, "rewards/margins": 20.777904510498047, "rewards/rejected": -21.240005493164062, "step": 3636 }, { "epoch": 1.24, "learning_rate": 6.645090900675194e-07, "logits/chosen": -0.010143293999135494, "logits/rejected": 0.04367731139063835, "logps/chosen": -244.19007873535156, "logps/rejected": -320.4306640625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6423039436340332, "rewards/margins": 12.286704063415527, "rewards/rejected": -13.929008483886719, "step": 3637 }, { "epoch": 1.24, "learning_rate": 6.639884663271635e-07, "logits/chosen": -0.006225135177373886, "logits/rejected": 0.038536954671144485, "logps/chosen": -240.9281768798828, "logps/rejected": -382.4062805175781, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.6564850807189941, "rewards/margins": 16.899444580078125, "rewards/rejected": -18.55592918395996, "step": 3638 }, { "epoch": 1.24, "learning_rate": 6.634679452336944e-07, "logits/chosen": 0.024896787479519844, "logits/rejected": 0.0326203852891922, "logps/chosen": -154.1060791015625, "logps/rejected": -292.1246337890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1456893682479858, "rewards/margins": 13.985360145568848, "rewards/rejected": -15.131048202514648, "step": 3639 }, { "epoch": 1.24, "learning_rate": 6.629475269461234e-07, "logits/chosen": 0.022311177104711533, "logits/rejected": 0.0547172874212265, "logps/chosen": -179.12100219726562, "logps/rejected": -331.7376708984375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.6264419555664062, "rewards/margins": 13.867471694946289, "rewards/rejected": -16.493913650512695, "step": 3640 }, { "epoch": 1.24, "learning_rate": 6.624272116234318e-07, "logits/chosen": 0.014780024066567421, "logits/rejected": 0.039822179824113846, "logps/chosen": -142.83705139160156, "logps/rejected": -316.9979248046875, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.8961310982704163, "rewards/margins": 15.487728118896484, "rewards/rejected": -16.383859634399414, "step": 3641 }, { "epoch": 1.24, "learning_rate": 6.619069994245685e-07, "logits/chosen": 0.04545002430677414, "logits/rejected": 0.06032866612076759, "logps/chosen": -203.87322998046875, "logps/rejected": -389.5498962402344, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.3962388038635254, "rewards/margins": 17.381750106811523, "rewards/rejected": -18.77798843383789, "step": 3642 }, { "epoch": 1.24, "learning_rate": 6.613868905084505e-07, "logits/chosen": 0.07592612504959106, "logits/rejected": 0.09241773933172226, "logps/chosen": -147.9357452392578, "logps/rejected": -345.7662353515625, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.8478281497955322, "rewards/margins": 18.248064041137695, "rewards/rejected": -19.09589385986328, "step": 3643 }, { "epoch": 1.24, "learning_rate": 6.608668850339651e-07, "logits/chosen": -0.008394579403102398, "logits/rejected": 0.027743177488446236, "logps/chosen": -200.76316833496094, "logps/rejected": -344.78717041015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.0432653427124023, "rewards/margins": 16.021120071411133, "rewards/rejected": -19.06438446044922, "step": 3644 }, { "epoch": 1.24, "learning_rate": 6.60346983159966e-07, "logits/chosen": 0.04739709198474884, "logits/rejected": 0.06335550546646118, "logps/chosen": -156.6736297607422, "logps/rejected": -308.10858154296875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.418593406677246, "rewards/margins": 14.051092147827148, "rewards/rejected": -16.469684600830078, "step": 3645 }, { "epoch": 1.24, "learning_rate": 6.598271850452762e-07, "logits/chosen": -0.09525485336780548, "logits/rejected": -0.06812884658575058, "logps/chosen": -243.3596954345703, "logps/rejected": -383.9298095703125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.5031143426895142, "rewards/margins": 16.3996524810791, "rewards/rejected": -16.90276527404785, "step": 3646 }, { "epoch": 1.24, "learning_rate": 6.59307490848687e-07, "logits/chosen": -0.02365555427968502, "logits/rejected": -0.0006294611957855523, "logps/chosen": -211.61253356933594, "logps/rejected": -351.6568603515625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -2.1767404079437256, "rewards/margins": 15.230151176452637, "rewards/rejected": -17.406890869140625, "step": 3647 }, { "epoch": 1.25, "learning_rate": 6.587879007289575e-07, "logits/chosen": -0.06531350314617157, "logits/rejected": -0.00568034965544939, "logps/chosen": -239.87088012695312, "logps/rejected": -367.6551818847656, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.186548113822937, "rewards/margins": 16.18726921081543, "rewards/rejected": -16.373817443847656, "step": 3648 }, { "epoch": 1.25, "learning_rate": 6.582684148448156e-07, "logits/chosen": 0.027020014822483063, "logits/rejected": 0.054487213492393494, "logps/chosen": -229.24197387695312, "logps/rejected": -344.01605224609375, "loss": 0.0732, "rewards/accuracies": 1.0, "rewards/chosen": -1.027190923690796, "rewards/margins": 13.4049072265625, "rewards/rejected": -14.432097434997559, "step": 3649 }, { "epoch": 1.25, "learning_rate": 6.57749033354957e-07, "logits/chosen": -0.03097996674478054, "logits/rejected": 0.02433059737086296, "logps/chosen": -264.5281982421875, "logps/rejected": -356.0513916015625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.059018492698669434, "rewards/margins": 17.36996841430664, "rewards/rejected": -17.428985595703125, "step": 3650 }, { "epoch": 1.25, "learning_rate": 6.572297564180456e-07, "logits/chosen": -0.014481885358691216, "logits/rejected": 0.014630314894020557, "logps/chosen": -199.4760284423828, "logps/rejected": -298.52459716796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8409096598625183, "rewards/margins": 14.175994873046875, "rewards/rejected": -15.016904830932617, "step": 3651 }, { "epoch": 1.25, "learning_rate": 6.567105841927132e-07, "logits/chosen": -0.04644721373915672, "logits/rejected": -0.02556530572474003, "logps/chosen": -153.67283630371094, "logps/rejected": -288.9209899902344, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -2.4962148666381836, "rewards/margins": 13.468952178955078, "rewards/rejected": -15.965167045593262, "step": 3652 }, { "epoch": 1.25, "learning_rate": 6.561915168375597e-07, "logits/chosen": -0.06639350205659866, "logits/rejected": -0.055709462612867355, "logps/chosen": -241.662109375, "logps/rejected": -417.986572265625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.2449660301208496, "rewards/margins": 17.067346572875977, "rewards/rejected": -18.312313079833984, "step": 3653 }, { "epoch": 1.25, "learning_rate": 6.556725545111534e-07, "logits/chosen": -0.005170225165784359, "logits/rejected": -0.005932302679866552, "logps/chosen": -162.00442504882812, "logps/rejected": -351.87225341796875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.825087070465088, "rewards/margins": 17.14010238647461, "rewards/rejected": -18.965190887451172, "step": 3654 }, { "epoch": 1.25, "learning_rate": 6.551536973720297e-07, "logits/chosen": -0.028121178969740868, "logits/rejected": -0.0009172876598313451, "logps/chosen": -264.092529296875, "logps/rejected": -416.1628723144531, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -2.4849343299865723, "rewards/margins": 17.90467071533203, "rewards/rejected": -20.389602661132812, "step": 3655 }, { "epoch": 1.25, "learning_rate": 6.546349455786925e-07, "logits/chosen": -0.08089067041873932, "logits/rejected": -0.028116334229707718, "logps/chosen": -247.32553100585938, "logps/rejected": -414.09881591796875, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.704392671585083, "rewards/margins": 22.23596954345703, "rewards/rejected": -22.94036293029785, "step": 3656 }, { "epoch": 1.25, "learning_rate": 6.541162992896134e-07, "logits/chosen": 0.07986779510974884, "logits/rejected": 0.09124594181776047, "logps/chosen": -193.96035766601562, "logps/rejected": -398.0432434082031, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.4372543096542358, "rewards/margins": 18.449634552001953, "rewards/rejected": -19.88688850402832, "step": 3657 }, { "epoch": 1.25, "learning_rate": 6.53597758663231e-07, "logits/chosen": 0.0944201648235321, "logits/rejected": 0.11688671261072159, "logps/chosen": -131.84896850585938, "logps/rejected": -290.32672119140625, "loss": 0.0678, "rewards/accuracies": 0.9375, "rewards/chosen": -2.636460781097412, "rewards/margins": 14.5768461227417, "rewards/rejected": -17.213306427001953, "step": 3658 }, { "epoch": 1.25, "learning_rate": 6.530793238579535e-07, "logits/chosen": 0.008718236349523067, "logits/rejected": 0.04326360672712326, "logps/chosen": -250.48501586914062, "logps/rejected": -357.2900390625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.9844387769699097, "rewards/margins": 14.134634971618652, "rewards/rejected": -16.11907386779785, "step": 3659 }, { "epoch": 1.25, "learning_rate": 6.525609950321551e-07, "logits/chosen": -0.09938681125640869, "logits/rejected": -0.08841890841722488, "logps/chosen": -229.20169067382812, "logps/rejected": -416.2709045410156, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -3.5918490886688232, "rewards/margins": 17.15383529663086, "rewards/rejected": -20.745683670043945, "step": 3660 }, { "epoch": 1.25, "learning_rate": 6.520427723441772e-07, "logits/chosen": -7.295882096514106e-05, "logits/rejected": 8.205122139770538e-05, "logps/chosen": -224.77694702148438, "logps/rejected": -416.364013671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.866956353187561, "rewards/margins": 17.271142959594727, "rewards/rejected": -19.138099670410156, "step": 3661 }, { "epoch": 1.25, "learning_rate": 6.515246559523311e-07, "logits/chosen": -0.03808170184493065, "logits/rejected": 0.0357234813272953, "logps/chosen": -221.5265655517578, "logps/rejected": -282.75836181640625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.7197178602218628, "rewards/margins": 14.204218864440918, "rewards/rejected": -14.92393684387207, "step": 3662 }, { "epoch": 1.25, "learning_rate": 6.510066460148931e-07, "logits/chosen": -0.1268572211265564, "logits/rejected": -0.09126254916191101, "logps/chosen": -307.01123046875, "logps/rejected": -466.4584045410156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.6420352458953857, "rewards/margins": 18.551876068115234, "rewards/rejected": -17.909839630126953, "step": 3663 }, { "epoch": 1.25, "learning_rate": 6.504887426901089e-07, "logits/chosen": 0.047676339745521545, "logits/rejected": 0.08272770792245865, "logps/chosen": -124.9668197631836, "logps/rejected": -257.536376953125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.8378887176513672, "rewards/margins": 14.218522071838379, "rewards/rejected": -15.056410789489746, "step": 3664 }, { "epoch": 1.25, "learning_rate": 6.499709461361904e-07, "logits/chosen": 0.03997143357992172, "logits/rejected": 0.06808499246835709, "logps/chosen": -225.74635314941406, "logps/rejected": -372.1020812988281, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.118187665939331, "rewards/margins": 17.965412139892578, "rewards/rejected": -20.08359718322754, "step": 3665 }, { "epoch": 1.25, "learning_rate": 6.494532565113171e-07, "logits/chosen": -0.010299398563802242, "logits/rejected": 0.004198718350380659, "logps/chosen": -213.87611389160156, "logps/rejected": -345.67626953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.5537468194961548, "rewards/margins": 14.684680938720703, "rewards/rejected": -16.238426208496094, "step": 3666 }, { "epoch": 1.25, "learning_rate": 6.489356739736365e-07, "logits/chosen": -0.05277709662914276, "logits/rejected": 0.0019626705907285213, "logps/chosen": -258.2089538574219, "logps/rejected": -341.6536560058594, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.39611008763313293, "rewards/margins": 16.603036880493164, "rewards/rejected": -16.9991455078125, "step": 3667 }, { "epoch": 1.25, "learning_rate": 6.484181986812624e-07, "logits/chosen": 0.004223787225782871, "logits/rejected": 0.02207709103822708, "logps/chosen": -221.54342651367188, "logps/rejected": -412.05218505859375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.48604631423950195, "rewards/margins": 18.179780960083008, "rewards/rejected": -18.665828704833984, "step": 3668 }, { "epoch": 1.25, "learning_rate": 6.47900830792277e-07, "logits/chosen": -0.1294524371623993, "logits/rejected": -0.0803401991724968, "logps/chosen": -209.949462890625, "logps/rejected": -349.1356201171875, "loss": 0.0249, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5241004228591919, "rewards/margins": 17.983505249023438, "rewards/rejected": -18.507606506347656, "step": 3669 }, { "epoch": 1.25, "learning_rate": 6.473835704647288e-07, "logits/chosen": -0.03226852789521217, "logits/rejected": 8.693518611835316e-05, "logps/chosen": -279.2312316894531, "logps/rejected": -473.396484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.449199676513672, "rewards/margins": 21.383296966552734, "rewards/rejected": -23.832494735717773, "step": 3670 }, { "epoch": 1.25, "learning_rate": 6.468664178566333e-07, "logits/chosen": -0.05075434595346451, "logits/rejected": -0.00908978097140789, "logps/chosen": -169.17120361328125, "logps/rejected": -281.9082946777344, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.6608922481536865, "rewards/margins": 13.455793380737305, "rewards/rejected": -16.116683959960938, "step": 3671 }, { "epoch": 1.25, "learning_rate": 6.46349373125974e-07, "logits/chosen": -0.12045862525701523, "logits/rejected": -0.10940046608448029, "logps/chosen": -206.49295043945312, "logps/rejected": -375.2352294921875, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.10059501975774765, "rewards/margins": 16.940628051757812, "rewards/rejected": -17.041223526000977, "step": 3672 }, { "epoch": 1.25, "learning_rate": 6.458324364307004e-07, "logits/chosen": 0.04626685380935669, "logits/rejected": 0.08174549043178558, "logps/chosen": -229.01502990722656, "logps/rejected": -359.931640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.878798246383667, "rewards/margins": 16.275285720825195, "rewards/rejected": -17.154083251953125, "step": 3673 }, { "epoch": 1.25, "learning_rate": 6.453156079287301e-07, "logits/chosen": 0.038901012390851974, "logits/rejected": 0.06802764534950256, "logps/chosen": -192.81809997558594, "logps/rejected": -331.91497802734375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.2091597318649292, "rewards/margins": 15.766599655151367, "rewards/rejected": -16.975759506225586, "step": 3674 }, { "epoch": 1.25, "learning_rate": 6.447988877779471e-07, "logits/chosen": -0.021551497280597687, "logits/rejected": -0.006775677669793367, "logps/chosen": -170.0254364013672, "logps/rejected": -353.8754577636719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.665212631225586, "rewards/margins": 16.088115692138672, "rewards/rejected": -18.75333023071289, "step": 3675 }, { "epoch": 1.25, "learning_rate": 6.442822761362014e-07, "logits/chosen": -0.1502874195575714, "logits/rejected": -0.108047254383564, "logps/chosen": -226.15908813476562, "logps/rejected": -383.7301025390625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.6529242992401123, "rewards/margins": 17.779308319091797, "rewards/rejected": -19.432233810424805, "step": 3676 }, { "epoch": 1.25, "learning_rate": 6.437657731613117e-07, "logits/chosen": -0.14867214858531952, "logits/rejected": -0.1391133815050125, "logps/chosen": -266.7565612792969, "logps/rejected": -472.2156677246094, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0713090896606445, "rewards/margins": 19.230640411376953, "rewards/rejected": -20.301952362060547, "step": 3677 }, { "epoch": 1.26, "learning_rate": 6.432493790110621e-07, "logits/chosen": 0.0879233330488205, "logits/rejected": 0.08357246220111847, "logps/chosen": -181.51565551757812, "logps/rejected": -435.7499084472656, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.6455894708633423, "rewards/margins": 20.419628143310547, "rewards/rejected": -22.06521987915039, "step": 3678 }, { "epoch": 1.26, "learning_rate": 6.42733093843204e-07, "logits/chosen": -0.07828060537576675, "logits/rejected": -0.08459975570440292, "logps/chosen": -251.8441162109375, "logps/rejected": -469.07958984375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.9819512963294983, "rewards/margins": 17.7951717376709, "rewards/rejected": -18.777124404907227, "step": 3679 }, { "epoch": 1.26, "learning_rate": 6.422169178154555e-07, "logits/chosen": -0.14371797442436218, "logits/rejected": -0.11212371289730072, "logps/chosen": -230.12057495117188, "logps/rejected": -315.9066162109375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.8118066787719727, "rewards/margins": 13.361053466796875, "rewards/rejected": -15.172861099243164, "step": 3680 }, { "epoch": 1.26, "learning_rate": 6.417008510855006e-07, "logits/chosen": -0.008399011567234993, "logits/rejected": 0.010467066429555416, "logps/chosen": -219.88906860351562, "logps/rejected": -411.9297180175781, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.7859667539596558, "rewards/margins": 20.097293853759766, "rewards/rejected": -21.883262634277344, "step": 3681 }, { "epoch": 1.26, "learning_rate": 6.411848938109916e-07, "logits/chosen": -0.046015799045562744, "logits/rejected": -0.01500660553574562, "logps/chosen": -187.27078247070312, "logps/rejected": -332.4517517089844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.8215960264205933, "rewards/margins": 15.180614471435547, "rewards/rejected": -17.00221061706543, "step": 3682 }, { "epoch": 1.26, "learning_rate": 6.406690461495454e-07, "logits/chosen": -0.014097880572080612, "logits/rejected": 0.04620293900370598, "logps/chosen": -214.8662567138672, "logps/rejected": -358.1044006347656, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.0372097492218018, "rewards/margins": 15.868256568908691, "rewards/rejected": -16.905466079711914, "step": 3683 }, { "epoch": 1.26, "learning_rate": 6.401533082587469e-07, "logits/chosen": 0.04779545217752457, "logits/rejected": 0.08428964763879776, "logps/chosen": -191.13748168945312, "logps/rejected": -322.9997253417969, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.505277395248413, "rewards/margins": 14.878094673156738, "rewards/rejected": -16.383373260498047, "step": 3684 }, { "epoch": 1.26, "learning_rate": 6.396376802961467e-07, "logits/chosen": -0.05882398411631584, "logits/rejected": -0.031162632629275322, "logps/chosen": -188.31414794921875, "logps/rejected": -348.45751953125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.4163095951080322, "rewards/margins": 15.446897506713867, "rewards/rejected": -16.86320686340332, "step": 3685 }, { "epoch": 1.26, "learning_rate": 6.39122162419262e-07, "logits/chosen": 0.02077082358300686, "logits/rejected": 0.1020299419760704, "logps/chosen": -185.51263427734375, "logps/rejected": -331.5848083496094, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.5365647077560425, "rewards/margins": 18.111635208129883, "rewards/rejected": -19.6481990814209, "step": 3686 }, { "epoch": 1.26, "learning_rate": 6.386067547855766e-07, "logits/chosen": -0.08271320909261703, "logits/rejected": -0.0466461107134819, "logps/chosen": -222.00221252441406, "logps/rejected": -338.57183837890625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.2370266914367676, "rewards/margins": 11.598034858703613, "rewards/rejected": -13.835062026977539, "step": 3687 }, { "epoch": 1.26, "learning_rate": 6.380914575525399e-07, "logits/chosen": -0.05180123448371887, "logits/rejected": -0.020860956981778145, "logps/chosen": -219.59872436523438, "logps/rejected": -336.0786437988281, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.6073766946792603, "rewards/margins": 14.744733810424805, "rewards/rejected": -16.352109909057617, "step": 3688 }, { "epoch": 1.26, "learning_rate": 6.375762708775688e-07, "logits/chosen": 0.07133481651544571, "logits/rejected": 0.07945133000612259, "logps/chosen": -123.96829986572266, "logps/rejected": -259.7830505371094, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -0.9441196322441101, "rewards/margins": 13.413068771362305, "rewards/rejected": -14.35718822479248, "step": 3689 }, { "epoch": 1.26, "learning_rate": 6.370611949180457e-07, "logits/chosen": 0.0543183907866478, "logits/rejected": 0.08850696682929993, "logps/chosen": -201.38607788085938, "logps/rejected": -354.9964294433594, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.199462890625, "rewards/margins": 16.333267211914062, "rewards/rejected": -17.532730102539062, "step": 3690 }, { "epoch": 1.26, "learning_rate": 6.365462298313182e-07, "logits/chosen": 0.026311565190553665, "logits/rejected": 0.06187107786536217, "logps/chosen": -165.45643615722656, "logps/rejected": -332.053955078125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.3510162830352783, "rewards/margins": 16.41399574279785, "rewards/rejected": -18.765010833740234, "step": 3691 }, { "epoch": 1.26, "learning_rate": 6.360313757747021e-07, "logits/chosen": 0.1088927760720253, "logits/rejected": 0.11478056758642197, "logps/chosen": -142.878173828125, "logps/rejected": -297.30377197265625, "loss": 0.0741, "rewards/accuracies": 0.9375, "rewards/chosen": -3.8820769786834717, "rewards/margins": 13.47250747680664, "rewards/rejected": -17.354583740234375, "step": 3692 }, { "epoch": 1.26, "learning_rate": 6.355166329054781e-07, "logits/chosen": 0.1657503992319107, "logits/rejected": 0.1648452877998352, "logps/chosen": -157.06362915039062, "logps/rejected": -367.8961181640625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.4817602634429932, "rewards/margins": 19.300416946411133, "rewards/rejected": -20.78217887878418, "step": 3693 }, { "epoch": 1.26, "learning_rate": 6.350020013808921e-07, "logits/chosen": 0.02341286838054657, "logits/rejected": 0.05824625864624977, "logps/chosen": -172.24017333984375, "logps/rejected": -373.1494445800781, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5897835493087769, "rewards/margins": 18.930429458618164, "rewards/rejected": -20.520214080810547, "step": 3694 }, { "epoch": 1.26, "learning_rate": 6.34487481358158e-07, "logits/chosen": -0.03520885482430458, "logits/rejected": 0.0011277528246864676, "logps/chosen": -201.6675262451172, "logps/rejected": -331.93267822265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.163023829460144, "rewards/margins": 15.576977729797363, "rewards/rejected": -16.740001678466797, "step": 3695 }, { "epoch": 1.26, "learning_rate": 6.33973072994454e-07, "logits/chosen": 0.05389624834060669, "logits/rejected": 0.09821326285600662, "logps/chosen": -223.22286987304688, "logps/rejected": -374.9140625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.1696908473968506, "rewards/margins": 15.798648834228516, "rewards/rejected": -17.968339920043945, "step": 3696 }, { "epoch": 1.26, "learning_rate": 6.334587764469251e-07, "logits/chosen": -0.05158907547593117, "logits/rejected": -0.030566060915589333, "logps/chosen": -263.09344482421875, "logps/rejected": -394.6788330078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.9598708152770996, "rewards/margins": 14.160505294799805, "rewards/rejected": -16.120376586914062, "step": 3697 }, { "epoch": 1.26, "learning_rate": 6.329445918726817e-07, "logits/chosen": 0.008108115755021572, "logits/rejected": 0.051411788910627365, "logps/chosen": -201.26318359375, "logps/rejected": -257.4604797363281, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5263353586196899, "rewards/margins": 12.572890281677246, "rewards/rejected": -13.099225997924805, "step": 3698 }, { "epoch": 1.26, "learning_rate": 6.324305194287997e-07, "logits/chosen": -0.1876644641160965, "logits/rejected": -0.10377363115549088, "logps/chosen": -273.8436584472656, "logps/rejected": -318.4343566894531, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9955871105194092, "rewards/margins": 15.201278686523438, "rewards/rejected": -16.19686508178711, "step": 3699 }, { "epoch": 1.26, "learning_rate": 6.319165592723217e-07, "logits/chosen": -0.041608937084674835, "logits/rejected": -0.001556389150209725, "logps/chosen": -226.5482635498047, "logps/rejected": -333.6229248046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1669057607650757, "rewards/margins": 15.584278106689453, "rewards/rejected": -16.75118637084961, "step": 3700 }, { "epoch": 1.26, "learning_rate": 6.314027115602549e-07, "logits/chosen": -0.03771244361996651, "logits/rejected": -0.018604500219225883, "logps/chosen": -231.04542541503906, "logps/rejected": -413.66015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.34258025884628296, "rewards/margins": 17.241474151611328, "rewards/rejected": -17.58405303955078, "step": 3701 }, { "epoch": 1.26, "learning_rate": 6.308889764495733e-07, "logits/chosen": 0.009942807257175446, "logits/rejected": 0.029750417917966843, "logps/chosen": -165.22088623046875, "logps/rejected": -352.4908447265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.445343017578125, "rewards/margins": 15.759018898010254, "rewards/rejected": -19.204362869262695, "step": 3702 }, { "epoch": 1.26, "learning_rate": 6.303753540972154e-07, "logits/chosen": 0.06309783458709717, "logits/rejected": 0.10214199125766754, "logps/chosen": -199.6045684814453, "logps/rejected": -321.1388854980469, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -1.262134313583374, "rewards/margins": 15.101630210876465, "rewards/rejected": -16.363765716552734, "step": 3703 }, { "epoch": 1.26, "learning_rate": 6.298618446600855e-07, "logits/chosen": -0.03443275764584541, "logits/rejected": -0.0026436415500938892, "logps/chosen": -237.7164306640625, "logps/rejected": -394.018310546875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2213844060897827, "rewards/margins": 16.427871704101562, "rewards/rejected": -17.64925765991211, "step": 3704 }, { "epoch": 1.26, "learning_rate": 6.293484482950548e-07, "logits/chosen": 0.03018169291317463, "logits/rejected": 0.04325569421052933, "logps/chosen": -200.91412353515625, "logps/rejected": -383.15167236328125, "loss": 0.058, "rewards/accuracies": 0.9375, "rewards/chosen": -3.104581117630005, "rewards/margins": 16.40596580505371, "rewards/rejected": -19.510547637939453, "step": 3705 }, { "epoch": 1.26, "learning_rate": 6.288351651589573e-07, "logits/chosen": 0.07972469180822372, "logits/rejected": 0.08991118520498276, "logps/chosen": -208.36441040039062, "logps/rejected": -336.2148742675781, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.8781907558441162, "rewards/margins": 13.12177848815918, "rewards/rejected": -14.999968528747559, "step": 3706 }, { "epoch": 1.27, "learning_rate": 6.283219954085951e-07, "logits/chosen": 0.03230462968349457, "logits/rejected": 0.05261439085006714, "logps/chosen": -231.85421752929688, "logps/rejected": -355.4825134277344, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.8241993188858032, "rewards/margins": 13.0862455368042, "rewards/rejected": -14.910444259643555, "step": 3707 }, { "epoch": 1.27, "learning_rate": 6.278089392007342e-07, "logits/chosen": 0.06525880843400955, "logits/rejected": 0.1161428689956665, "logps/chosen": -196.89678955078125, "logps/rejected": -283.8232727050781, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.7455216646194458, "rewards/margins": 14.505539894104004, "rewards/rejected": -16.251062393188477, "step": 3708 }, { "epoch": 1.27, "learning_rate": 6.272959966921054e-07, "logits/chosen": -0.03446853533387184, "logits/rejected": 0.004053077194839716, "logps/chosen": -222.42840576171875, "logps/rejected": -378.69647216796875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.954849123954773, "rewards/margins": 16.945768356323242, "rewards/rejected": -18.900615692138672, "step": 3709 }, { "epoch": 1.27, "learning_rate": 6.267831680394066e-07, "logits/chosen": -0.07024403661489487, "logits/rejected": -0.04515949636697769, "logps/chosen": -234.173095703125, "logps/rejected": -410.8317565917969, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.2214602828025818, "rewards/margins": 19.513635635375977, "rewards/rejected": -19.73509407043457, "step": 3710 }, { "epoch": 1.27, "learning_rate": 6.262704533992994e-07, "logits/chosen": -0.09769459068775177, "logits/rejected": -0.06450075656175613, "logps/chosen": -196.78497314453125, "logps/rejected": -282.6642761230469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.2170560657978058, "rewards/margins": 12.268909454345703, "rewards/rejected": -12.485965728759766, "step": 3711 }, { "epoch": 1.27, "learning_rate": 6.257578529284112e-07, "logits/chosen": 0.014468420296907425, "logits/rejected": 0.022886063903570175, "logps/chosen": -161.6031951904297, "logps/rejected": -329.1500244140625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0751663446426392, "rewards/margins": 14.279792785644531, "rewards/rejected": -15.354959487915039, "step": 3712 }, { "epoch": 1.27, "learning_rate": 6.252453667833343e-07, "logits/chosen": -0.013297487050294876, "logits/rejected": 0.002569874282926321, "logps/chosen": -229.2506103515625, "logps/rejected": -339.84942626953125, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -2.279855489730835, "rewards/margins": 12.969270706176758, "rewards/rejected": -15.249125480651855, "step": 3713 }, { "epoch": 1.27, "learning_rate": 6.247329951206259e-07, "logits/chosen": 0.05827780440449715, "logits/rejected": 0.09015127271413803, "logps/chosen": -198.25115966796875, "logps/rejected": -360.41015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2660578489303589, "rewards/margins": 17.43967056274414, "rewards/rejected": -18.70572853088379, "step": 3714 }, { "epoch": 1.27, "learning_rate": 6.242207380968088e-07, "logits/chosen": -0.04786314442753792, "logits/rejected": -0.037619493901729584, "logps/chosen": -207.26174926757812, "logps/rejected": -367.1748352050781, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.713448166847229, "rewards/margins": 16.688886642456055, "rewards/rejected": -17.402334213256836, "step": 3715 }, { "epoch": 1.27, "learning_rate": 6.237085958683703e-07, "logits/chosen": 0.021399250254034996, "logits/rejected": 0.01630363240838051, "logps/chosen": -118.95744323730469, "logps/rejected": -317.9458312988281, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.7156046628952026, "rewards/margins": 15.197271347045898, "rewards/rejected": -16.91287612915039, "step": 3716 }, { "epoch": 1.27, "learning_rate": 6.231965685917629e-07, "logits/chosen": 0.18540836870670319, "logits/rejected": 0.20031942427158356, "logps/chosen": -142.97877502441406, "logps/rejected": -332.498291015625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -2.209015369415283, "rewards/margins": 16.649749755859375, "rewards/rejected": -18.858768463134766, "step": 3717 }, { "epoch": 1.27, "learning_rate": 6.226846564234039e-07, "logits/chosen": 0.0059371646493673325, "logits/rejected": 0.04178459197282791, "logps/chosen": -213.18638610839844, "logps/rejected": -371.0028076171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.7757478952407837, "rewards/margins": 18.92868995666504, "rewards/rejected": -20.704437255859375, "step": 3718 }, { "epoch": 1.27, "learning_rate": 6.221728595196749e-07, "logits/chosen": 0.014603768475353718, "logits/rejected": 0.05663163959980011, "logps/chosen": -211.23631286621094, "logps/rejected": -354.3833923339844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3925971984863281, "rewards/margins": 18.23004913330078, "rewards/rejected": -19.622644424438477, "step": 3719 }, { "epoch": 1.27, "learning_rate": 6.216611780369238e-07, "logits/chosen": -0.048239514231681824, "logits/rejected": 0.017155807465314865, "logps/chosen": -228.72335815429688, "logps/rejected": -324.4925842285156, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.35932812094688416, "rewards/margins": 15.858867645263672, "rewards/rejected": -16.218198776245117, "step": 3720 }, { "epoch": 1.27, "learning_rate": 6.211496121314609e-07, "logits/chosen": -0.0446263924241066, "logits/rejected": -0.0540611669421196, "logps/chosen": -195.10594177246094, "logps/rejected": -444.669921875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.725238800048828, "rewards/margins": 21.2010555267334, "rewards/rejected": -23.926294326782227, "step": 3721 }, { "epoch": 1.27, "learning_rate": 6.206381619595638e-07, "logits/chosen": -0.016940144822001457, "logits/rejected": 0.014635887928307056, "logps/chosen": -206.65708923339844, "logps/rejected": -290.18359375, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -0.15606963634490967, "rewards/margins": 12.985116004943848, "rewards/rejected": -13.141185760498047, "step": 3722 }, { "epoch": 1.27, "learning_rate": 6.20126827677473e-07, "logits/chosen": -0.06439550220966339, "logits/rejected": -0.04230457916855812, "logps/chosen": -193.71881103515625, "logps/rejected": -310.8238525390625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9951283931732178, "rewards/margins": 13.17641830444336, "rewards/rejected": -15.171546936035156, "step": 3723 }, { "epoch": 1.27, "learning_rate": 6.196156094413934e-07, "logits/chosen": -0.014815202914178371, "logits/rejected": 0.013386886566877365, "logps/chosen": -237.02198791503906, "logps/rejected": -314.9306945800781, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.4448107481002808, "rewards/margins": 13.422358512878418, "rewards/rejected": -14.867170333862305, "step": 3724 }, { "epoch": 1.27, "learning_rate": 6.191045074074961e-07, "logits/chosen": 0.12773357331752777, "logits/rejected": 0.13079674541950226, "logps/chosen": -192.985107421875, "logps/rejected": -352.88751220703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.08496201038360596, "rewards/margins": 14.718894958496094, "rewards/rejected": -14.803855895996094, "step": 3725 }, { "epoch": 1.27, "learning_rate": 6.18593521731915e-07, "logits/chosen": 0.15580035746097565, "logits/rejected": 0.1503988653421402, "logps/chosen": -153.9240264892578, "logps/rejected": -365.91131591796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.2350049018859863, "rewards/margins": 16.952953338623047, "rewards/rejected": -19.187959671020508, "step": 3726 }, { "epoch": 1.27, "learning_rate": 6.180826525707499e-07, "logits/chosen": -0.09777572005987167, "logits/rejected": -0.07204011082649231, "logps/chosen": -219.02731323242188, "logps/rejected": -370.3293762207031, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.46062928438186646, "rewards/margins": 18.512197494506836, "rewards/rejected": -18.05156898498535, "step": 3727 }, { "epoch": 1.27, "learning_rate": 6.175719000800636e-07, "logits/chosen": 0.053670503199100494, "logits/rejected": 0.07248011976480484, "logps/chosen": -180.16070556640625, "logps/rejected": -313.6840515136719, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.66777503490448, "rewards/margins": 14.62710189819336, "rewards/rejected": -15.294876098632812, "step": 3728 }, { "epoch": 1.27, "learning_rate": 6.170612644158843e-07, "logits/chosen": 0.004024200607091188, "logits/rejected": 0.007850807160139084, "logps/chosen": -209.4070587158203, "logps/rejected": -369.9721374511719, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.896775722503662, "rewards/margins": 12.299769401550293, "rewards/rejected": -15.19654655456543, "step": 3729 }, { "epoch": 1.27, "learning_rate": 6.16550745734204e-07, "logits/chosen": 0.07618749886751175, "logits/rejected": 0.10671623796224594, "logps/chosen": -161.03921508789062, "logps/rejected": -315.32293701171875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.034520149230957, "rewards/margins": 15.148232460021973, "rewards/rejected": -17.182754516601562, "step": 3730 }, { "epoch": 1.27, "learning_rate": 6.160403441909794e-07, "logits/chosen": 0.0405551940202713, "logits/rejected": 0.07295244187116623, "logps/chosen": -162.557373046875, "logps/rejected": -323.9552001953125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.672618865966797, "rewards/margins": 15.919696807861328, "rewards/rejected": -19.592315673828125, "step": 3731 }, { "epoch": 1.27, "learning_rate": 6.155300599421305e-07, "logits/chosen": 0.03023497946560383, "logits/rejected": 0.038993146270513535, "logps/chosen": -241.56138610839844, "logps/rejected": -469.3656311035156, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.3127350807189941, "rewards/margins": 21.219337463378906, "rewards/rejected": -22.532072067260742, "step": 3732 }, { "epoch": 1.27, "learning_rate": 6.150198931435429e-07, "logits/chosen": -0.015799878165125847, "logits/rejected": 0.002440599724650383, "logps/chosen": -173.98521423339844, "logps/rejected": -337.3231201171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.143404245376587, "rewards/margins": 14.818682670593262, "rewards/rejected": -16.962085723876953, "step": 3733 }, { "epoch": 1.27, "learning_rate": 6.145098439510644e-07, "logits/chosen": -0.009806959889829159, "logits/rejected": 0.023809565231204033, "logps/chosen": -226.11351013183594, "logps/rejected": -421.3963317871094, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.8053178787231445, "rewards/margins": 18.330381393432617, "rewards/rejected": -20.135698318481445, "step": 3734 }, { "epoch": 1.27, "learning_rate": 6.139999125205095e-07, "logits/chosen": -0.004727762192487717, "logits/rejected": 0.034156426787376404, "logps/chosen": -233.97686767578125, "logps/rejected": -346.5296325683594, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.284334659576416, "rewards/margins": 16.115535736083984, "rewards/rejected": -18.399869918823242, "step": 3735 }, { "epoch": 1.28, "learning_rate": 6.134900990076541e-07, "logits/chosen": 0.042117949575185776, "logits/rejected": 0.059975385665893555, "logps/chosen": -179.2297821044922, "logps/rejected": -356.10382080078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3823763132095337, "rewards/margins": 16.504161834716797, "rewards/rejected": -17.886539459228516, "step": 3736 }, { "epoch": 1.28, "learning_rate": 6.129804035682392e-07, "logits/chosen": 0.024132976308465004, "logits/rejected": 0.05635276436805725, "logps/chosen": -184.6190948486328, "logps/rejected": -355.03228759765625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.8189022541046143, "rewards/margins": 18.839059829711914, "rewards/rejected": -20.6579647064209, "step": 3737 }, { "epoch": 1.28, "learning_rate": 6.124708263579705e-07, "logits/chosen": 0.03896813094615936, "logits/rejected": 0.0749516412615776, "logps/chosen": -242.19232177734375, "logps/rejected": -404.04949951171875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.0260496139526367, "rewards/margins": 16.67148780822754, "rewards/rejected": -19.69753646850586, "step": 3738 }, { "epoch": 1.28, "learning_rate": 6.119613675325159e-07, "logits/chosen": -0.0485590323805809, "logits/rejected": -0.03488896042108536, "logps/chosen": -238.96975708007812, "logps/rejected": -327.4088134765625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.3258707523345947, "rewards/margins": 10.395886421203613, "rewards/rejected": -10.721755981445312, "step": 3739 }, { "epoch": 1.28, "learning_rate": 6.114520272475088e-07, "logits/chosen": 0.03312421590089798, "logits/rejected": 0.08932971209287643, "logps/chosen": -189.07969665527344, "logps/rejected": -258.566162109375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.3530051708221436, "rewards/margins": 13.937507629394531, "rewards/rejected": -15.290513038635254, "step": 3740 }, { "epoch": 1.28, "learning_rate": 6.109428056585452e-07, "logits/chosen": 0.023075249046087265, "logits/rejected": 0.059427909553050995, "logps/chosen": -185.06845092773438, "logps/rejected": -326.02392578125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.6934053897857666, "rewards/margins": 13.761334419250488, "rewards/rejected": -15.45473861694336, "step": 3741 }, { "epoch": 1.28, "learning_rate": 6.104337029211853e-07, "logits/chosen": 0.009973456151783466, "logits/rejected": 0.013554658740758896, "logps/chosen": -207.7156219482422, "logps/rejected": -391.32861328125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.3521320819854736, "rewards/margins": 17.375150680541992, "rewards/rejected": -19.727283477783203, "step": 3742 }, { "epoch": 1.28, "learning_rate": 6.099247191909532e-07, "logits/chosen": 0.07065078616142273, "logits/rejected": 0.12942786514759064, "logps/chosen": -158.23497009277344, "logps/rejected": -263.27978515625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5073793530464172, "rewards/margins": 15.800531387329102, "rewards/rejected": -16.307910919189453, "step": 3743 }, { "epoch": 1.28, "learning_rate": 6.094158546233358e-07, "logits/chosen": 0.011425556614995003, "logits/rejected": 0.038387008011341095, "logps/chosen": -144.6475830078125, "logps/rejected": -329.225341796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.55379319190979, "rewards/margins": 17.374217987060547, "rewards/rejected": -19.928010940551758, "step": 3744 }, { "epoch": 1.28, "learning_rate": 6.089071093737852e-07, "logits/chosen": -0.007083904463797808, "logits/rejected": 0.012719828635454178, "logps/chosen": -274.83380126953125, "logps/rejected": -446.342529296875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -2.0264883041381836, "rewards/margins": 17.012714385986328, "rewards/rejected": -19.039201736450195, "step": 3745 }, { "epoch": 1.28, "learning_rate": 6.083984835977153e-07, "logits/chosen": 0.019519662484526634, "logits/rejected": 0.0357837900519371, "logps/chosen": -189.91845703125, "logps/rejected": -308.60589599609375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.5677040219306946, "rewards/margins": 13.2456693649292, "rewards/rejected": -13.813373565673828, "step": 3746 }, { "epoch": 1.28, "learning_rate": 6.078899774505042e-07, "logits/chosen": 0.07091321796178818, "logits/rejected": 0.08694402873516083, "logps/chosen": -216.90777587890625, "logps/rejected": -358.9932861328125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.3734030723571777, "rewards/margins": 13.509991645812988, "rewards/rejected": -14.883393287658691, "step": 3747 }, { "epoch": 1.28, "learning_rate": 6.073815910874941e-07, "logits/chosen": -0.036764707416296005, "logits/rejected": -0.026152098551392555, "logps/chosen": -222.8729248046875, "logps/rejected": -382.7438659667969, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -1.7391531467437744, "rewards/margins": 14.559476852416992, "rewards/rejected": -16.298629760742188, "step": 3748 }, { "epoch": 1.28, "learning_rate": 6.068733246639892e-07, "logits/chosen": 0.0006553367711603642, "logits/rejected": 0.028158342465758324, "logps/chosen": -236.71755981445312, "logps/rejected": -382.31475830078125, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.17881456017494202, "rewards/margins": 18.172473907470703, "rewards/rejected": -18.351289749145508, "step": 3749 }, { "epoch": 1.28, "learning_rate": 6.063651783352588e-07, "logits/chosen": 0.14375370740890503, "logits/rejected": 0.18182750046253204, "logps/chosen": -169.50079345703125, "logps/rejected": -267.53387451171875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7443423867225647, "rewards/margins": 14.39242172241211, "rewards/rejected": -15.136765480041504, "step": 3750 }, { "epoch": 1.28, "learning_rate": 6.058571522565341e-07, "logits/chosen": 0.11820480972528458, "logits/rejected": 0.15596048533916473, "logps/chosen": -166.1869354248047, "logps/rejected": -367.29010009765625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3310084342956543, "rewards/margins": 19.27800750732422, "rewards/rejected": -20.60901641845703, "step": 3751 }, { "epoch": 1.28, "learning_rate": 6.053492465830097e-07, "logits/chosen": 0.0414944589138031, "logits/rejected": 0.04260264337062836, "logps/chosen": -174.82691955566406, "logps/rejected": -284.51617431640625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.887711524963379, "rewards/margins": 9.803564071655273, "rewards/rejected": -12.691274642944336, "step": 3752 }, { "epoch": 1.28, "learning_rate": 6.048414614698447e-07, "logits/chosen": 0.06932978332042694, "logits/rejected": 0.12532754242420197, "logps/chosen": -187.5008087158203, "logps/rejected": -263.75567626953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.3811594247817993, "rewards/margins": 14.257193565368652, "rewards/rejected": -15.63835334777832, "step": 3753 }, { "epoch": 1.28, "learning_rate": 6.043337970721593e-07, "logits/chosen": 0.030598683282732964, "logits/rejected": 0.037681158632040024, "logps/chosen": -184.01898193359375, "logps/rejected": -390.1070251464844, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.8909075260162354, "rewards/margins": 18.758827209472656, "rewards/rejected": -19.649734497070312, "step": 3754 }, { "epoch": 1.28, "learning_rate": 6.038262535450391e-07, "logits/chosen": 0.10837054997682571, "logits/rejected": 0.11984700709581375, "logps/chosen": -206.93800354003906, "logps/rejected": -335.2945861816406, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.8675625324249268, "rewards/margins": 14.251279830932617, "rewards/rejected": -15.118844032287598, "step": 3755 }, { "epoch": 1.28, "learning_rate": 6.033188310435311e-07, "logits/chosen": -0.006068351678550243, "logits/rejected": 0.030444463714957237, "logps/chosen": -230.0974884033203, "logps/rejected": -380.1741027832031, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.4394307136535645, "rewards/margins": 15.124116897583008, "rewards/rejected": -17.563547134399414, "step": 3756 }, { "epoch": 1.28, "learning_rate": 6.028115297226456e-07, "logits/chosen": -0.004895646125078201, "logits/rejected": 0.05705225467681885, "logps/chosen": -259.3913879394531, "logps/rejected": -403.744384765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.544171094894409, "rewards/margins": 18.754140853881836, "rewards/rejected": -21.29831314086914, "step": 3757 }, { "epoch": 1.28, "learning_rate": 6.023043497373567e-07, "logits/chosen": 0.059350840747356415, "logits/rejected": 0.09338788688182831, "logps/chosen": -251.0384521484375, "logps/rejected": -356.26141357421875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.063270092010498, "rewards/margins": 14.774238586425781, "rewards/rejected": -15.837508201599121, "step": 3758 }, { "epoch": 1.28, "learning_rate": 6.017972912426003e-07, "logits/chosen": 0.07144013047218323, "logits/rejected": 0.10032592713832855, "logps/chosen": -183.41429138183594, "logps/rejected": -287.0291748046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.3880833387374878, "rewards/margins": 13.388882637023926, "rewards/rejected": -14.776966094970703, "step": 3759 }, { "epoch": 1.28, "learning_rate": 6.012903543932766e-07, "logits/chosen": 0.037160441279411316, "logits/rejected": 0.038139112293720245, "logps/chosen": -198.6249237060547, "logps/rejected": -364.9064025878906, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -2.561105728149414, "rewards/margins": 15.780363082885742, "rewards/rejected": -18.341468811035156, "step": 3760 }, { "epoch": 1.28, "learning_rate": 6.00783539344247e-07, "logits/chosen": -0.07692722231149673, "logits/rejected": -0.04747516289353371, "logps/chosen": -212.0731658935547, "logps/rejected": -324.88555908203125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.571188449859619, "rewards/margins": 13.638099670410156, "rewards/rejected": -16.209287643432617, "step": 3761 }, { "epoch": 1.28, "learning_rate": 6.00276846250337e-07, "logits/chosen": -0.00914696604013443, "logits/rejected": 0.013482081703841686, "logps/chosen": -124.5472412109375, "logps/rejected": -314.75067138671875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -2.4144368171691895, "rewards/margins": 15.803009033203125, "rewards/rejected": -18.217445373535156, "step": 3762 }, { "epoch": 1.28, "learning_rate": 5.997702752663341e-07, "logits/chosen": 0.07318785041570663, "logits/rejected": 0.08729701489210129, "logps/chosen": -190.97515869140625, "logps/rejected": -341.1517639160156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.0027053356170654, "rewards/margins": 14.785966873168945, "rewards/rejected": -16.788673400878906, "step": 3763 }, { "epoch": 1.28, "learning_rate": 5.992638265469885e-07, "logits/chosen": 0.1587161272764206, "logits/rejected": 0.18904957175254822, "logps/chosen": -215.86703491210938, "logps/rejected": -333.20849609375, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.1186952590942383, "rewards/margins": 14.009217262268066, "rewards/rejected": -15.127912521362305, "step": 3764 }, { "epoch": 1.28, "learning_rate": 5.987575002470141e-07, "logits/chosen": -0.04789271950721741, "logits/rejected": 0.0014606183394789696, "logps/chosen": -183.73358154296875, "logps/rejected": -284.17547607421875, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.475201964378357, "rewards/margins": 15.001351356506348, "rewards/rejected": -16.476552963256836, "step": 3765 }, { "epoch": 1.29, "learning_rate": 5.98251296521086e-07, "logits/chosen": 0.030619706958532333, "logits/rejected": 0.05053889751434326, "logps/chosen": -152.98348999023438, "logps/rejected": -270.41302490234375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.468401312828064, "rewards/margins": 12.956787109375, "rewards/rejected": -14.425189971923828, "step": 3766 }, { "epoch": 1.29, "learning_rate": 5.977452155238421e-07, "logits/chosen": 0.06659139692783356, "logits/rejected": 0.09942682087421417, "logps/chosen": -146.8118133544922, "logps/rejected": -299.08709716796875, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -3.111309051513672, "rewards/margins": 14.408540725708008, "rewards/rejected": -17.51984977722168, "step": 3767 }, { "epoch": 1.29, "learning_rate": 5.972392574098843e-07, "logits/chosen": -0.017925171181559563, "logits/rejected": -0.009863440878689289, "logps/chosen": -211.68435668945312, "logps/rejected": -314.25531005859375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.183720588684082, "rewards/margins": 11.349170684814453, "rewards/rejected": -13.532890319824219, "step": 3768 }, { "epoch": 1.29, "learning_rate": 5.96733422333775e-07, "logits/chosen": -0.03657867759466171, "logits/rejected": -0.017996884882450104, "logps/chosen": -168.4652099609375, "logps/rejected": -298.8036804199219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1603697538375854, "rewards/margins": 15.155427932739258, "rewards/rejected": -16.315797805786133, "step": 3769 }, { "epoch": 1.29, "learning_rate": 5.962277104500393e-07, "logits/chosen": -0.0794055163860321, "logits/rejected": -0.054612159729003906, "logps/chosen": -217.18243408203125, "logps/rejected": -379.1611328125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.2480905055999756, "rewards/margins": 14.93663215637207, "rewards/rejected": -17.184722900390625, "step": 3770 }, { "epoch": 1.29, "learning_rate": 5.957221219131665e-07, "logits/chosen": 0.05571800842881203, "logits/rejected": 0.039925385266542435, "logps/chosen": -205.86325073242188, "logps/rejected": -439.6063537597656, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.7353609204292297, "rewards/margins": 20.292875289916992, "rewards/rejected": -21.02823829650879, "step": 3771 }, { "epoch": 1.29, "learning_rate": 5.952166568776062e-07, "logits/chosen": 0.03887006640434265, "logits/rejected": 0.05288352444767952, "logps/chosen": -227.69447326660156, "logps/rejected": -385.06829833984375, "loss": 0.0507, "rewards/accuracies": 0.9375, "rewards/chosen": -3.127349376678467, "rewards/margins": 16.53696632385254, "rewards/rejected": -19.664316177368164, "step": 3772 }, { "epoch": 1.29, "learning_rate": 5.947113154977709e-07, "logits/chosen": -0.0751338079571724, "logits/rejected": -0.0534825474023819, "logps/chosen": -244.1991424560547, "logps/rejected": -367.5759582519531, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.4741333723068237, "rewards/margins": 13.486379623413086, "rewards/rejected": -14.960511207580566, "step": 3773 }, { "epoch": 1.29, "learning_rate": 5.94206097928036e-07, "logits/chosen": 0.15990380942821503, "logits/rejected": 0.203345388174057, "logps/chosen": -168.93600463867188, "logps/rejected": -281.24774169921875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9718489646911621, "rewards/margins": 15.888107299804688, "rewards/rejected": -16.859956741333008, "step": 3774 }, { "epoch": 1.29, "learning_rate": 5.937010043227377e-07, "logits/chosen": 0.03804638236761093, "logits/rejected": 0.04833588749170303, "logps/chosen": -193.46983337402344, "logps/rejected": -393.4414978027344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.078202247619629, "rewards/margins": 16.682506561279297, "rewards/rejected": -18.760711669921875, "step": 3775 }, { "epoch": 1.29, "learning_rate": 5.931960348361759e-07, "logits/chosen": 0.009061029180884361, "logits/rejected": 0.053920209407806396, "logps/chosen": -241.84152221679688, "logps/rejected": -349.4850158691406, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.704605221748352, "rewards/margins": 14.625791549682617, "rewards/rejected": -16.33039665222168, "step": 3776 }, { "epoch": 1.29, "learning_rate": 5.926911896226111e-07, "logits/chosen": 0.09953440725803375, "logits/rejected": 0.11643128097057343, "logps/chosen": -110.57858276367188, "logps/rejected": -225.6280517578125, "loss": 0.0197, "rewards/accuracies": 0.9375, "rewards/chosen": -2.105706214904785, "rewards/margins": 10.216267585754395, "rewards/rejected": -12.32197380065918, "step": 3777 }, { "epoch": 1.29, "learning_rate": 5.921864688362672e-07, "logits/chosen": -0.02307838387787342, "logits/rejected": 0.016574189066886902, "logps/chosen": -267.03662109375, "logps/rejected": -474.33245849609375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.080210566520691, "rewards/margins": 20.762073516845703, "rewards/rejected": -21.842283248901367, "step": 3778 }, { "epoch": 1.29, "learning_rate": 5.91681872631329e-07, "logits/chosen": 0.05989678576588631, "logits/rejected": 0.09598885476589203, "logps/chosen": -172.70628356933594, "logps/rejected": -325.8233642578125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.3146297931671143, "rewards/margins": 15.671283721923828, "rewards/rejected": -16.985912322998047, "step": 3779 }, { "epoch": 1.29, "learning_rate": 5.911774011619437e-07, "logits/chosen": -0.008319763466715813, "logits/rejected": 0.003812698880210519, "logps/chosen": -197.6271514892578, "logps/rejected": -347.9246826171875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.9121322631835938, "rewards/margins": 13.608543395996094, "rewards/rejected": -16.520675659179688, "step": 3780 }, { "epoch": 1.29, "learning_rate": 5.906730545822205e-07, "logits/chosen": 0.03132886439561844, "logits/rejected": 0.0367424376308918, "logps/chosen": -176.75262451171875, "logps/rejected": -304.7694091796875, "loss": 0.0165, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1122695207595825, "rewards/margins": 13.70793342590332, "rewards/rejected": -14.820202827453613, "step": 3781 }, { "epoch": 1.29, "learning_rate": 5.9016883304623e-07, "logits/chosen": 0.012837582267820835, "logits/rejected": 0.01850702427327633, "logps/chosen": -217.1555633544922, "logps/rejected": -372.8609924316406, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.574081301689148, "rewards/margins": 15.850470542907715, "rewards/rejected": -16.42455291748047, "step": 3782 }, { "epoch": 1.29, "learning_rate": 5.896647367080059e-07, "logits/chosen": 0.08918610960245132, "logits/rejected": 0.15061350166797638, "logps/chosen": -185.04022216796875, "logps/rejected": -325.0733337402344, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.40614959597587585, "rewards/margins": 19.10676383972168, "rewards/rejected": -19.51291275024414, "step": 3783 }, { "epoch": 1.29, "learning_rate": 5.891607657215416e-07, "logits/chosen": 0.026578174903988838, "logits/rejected": 0.06493550539016724, "logps/chosen": -163.52703857421875, "logps/rejected": -295.4877014160156, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.24609100818634033, "rewards/margins": 17.135425567626953, "rewards/rejected": -17.381515502929688, "step": 3784 }, { "epoch": 1.29, "learning_rate": 5.886569202407933e-07, "logits/chosen": -0.028819985687732697, "logits/rejected": -0.04009206220507622, "logps/chosen": -173.2981414794922, "logps/rejected": -378.0633239746094, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.4880830943584442, "rewards/margins": 17.18475341796875, "rewards/rejected": -17.672834396362305, "step": 3785 }, { "epoch": 1.29, "learning_rate": 5.881532004196797e-07, "logits/chosen": 0.028565632179379463, "logits/rejected": 0.07680156081914902, "logps/chosen": -236.5084686279297, "logps/rejected": -376.3026428222656, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.1113137006759644, "rewards/margins": 17.63492202758789, "rewards/rejected": -18.746234893798828, "step": 3786 }, { "epoch": 1.29, "learning_rate": 5.876496064120796e-07, "logits/chosen": 0.04489465430378914, "logits/rejected": 0.06879686564207077, "logps/chosen": -246.77830505371094, "logps/rejected": -343.0354919433594, "loss": 0.0273, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6794674396514893, "rewards/margins": 12.532241821289062, "rewards/rejected": -14.211709976196289, "step": 3787 }, { "epoch": 1.29, "learning_rate": 5.871461383718344e-07, "logits/chosen": 0.047791000455617905, "logits/rejected": 0.10171861946582794, "logps/chosen": -227.6376953125, "logps/rejected": -399.7913818359375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -2.1313295364379883, "rewards/margins": 19.879119873046875, "rewards/rejected": -22.010448455810547, "step": 3788 }, { "epoch": 1.29, "learning_rate": 5.866427964527463e-07, "logits/chosen": 0.16586445271968842, "logits/rejected": 0.18019616603851318, "logps/chosen": -148.9845428466797, "logps/rejected": -279.49700927734375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -2.2771449089050293, "rewards/margins": 13.52463150024414, "rewards/rejected": -15.801777839660645, "step": 3789 }, { "epoch": 1.29, "learning_rate": 5.861395808085791e-07, "logits/chosen": -0.02514496259391308, "logits/rejected": -0.008717598393559456, "logps/chosen": -233.54734802246094, "logps/rejected": -446.3798828125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7987411022186279, "rewards/margins": 18.79204559326172, "rewards/rejected": -19.590787887573242, "step": 3790 }, { "epoch": 1.29, "learning_rate": 5.85636491593059e-07, "logits/chosen": 0.04797748103737831, "logits/rejected": 0.07149471342563629, "logps/chosen": -204.08847045898438, "logps/rejected": -415.1002197265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.867734432220459, "rewards/margins": 19.701801300048828, "rewards/rejected": -21.569536209106445, "step": 3791 }, { "epoch": 1.29, "learning_rate": 5.851335289598721e-07, "logits/chosen": -0.006696790456771851, "logits/rejected": 0.026250572875142097, "logps/chosen": -224.20091247558594, "logps/rejected": -343.149169921875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.09855154156684875, "rewards/margins": 16.03602409362793, "rewards/rejected": -16.13457679748535, "step": 3792 }, { "epoch": 1.29, "learning_rate": 5.84630693062667e-07, "logits/chosen": 0.036015354096889496, "logits/rejected": 0.08858178555965424, "logps/chosen": -268.7052307128906, "logps/rejected": -378.618408203125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.8336926698684692, "rewards/margins": 15.88287353515625, "rewards/rejected": -17.71656608581543, "step": 3793 }, { "epoch": 1.29, "learning_rate": 5.84127984055053e-07, "logits/chosen": -0.03102913498878479, "logits/rejected": -0.0036942411679774523, "logps/chosen": -234.11773681640625, "logps/rejected": -416.13519287109375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.5164034366607666, "rewards/margins": 17.723621368408203, "rewards/rejected": -19.24002456665039, "step": 3794 }, { "epoch": 1.3, "learning_rate": 5.836254020906003e-07, "logits/chosen": 0.07220105826854706, "logits/rejected": 0.06752420961856842, "logps/chosen": -198.99594116210938, "logps/rejected": -399.8918762207031, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.6460357308387756, "rewards/margins": 16.794340133666992, "rewards/rejected": -17.440378189086914, "step": 3795 }, { "epoch": 1.3, "learning_rate": 5.831229473228417e-07, "logits/chosen": -0.021558400243520737, "logits/rejected": 0.017919013276696205, "logps/chosen": -231.791748046875, "logps/rejected": -359.8796081542969, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -1.9640703201293945, "rewards/margins": 14.218097686767578, "rewards/rejected": -16.18216896057129, "step": 3796 }, { "epoch": 1.3, "learning_rate": 5.826206199052693e-07, "logits/chosen": -0.09635478258132935, "logits/rejected": -0.08746213465929031, "logps/chosen": -244.78140258789062, "logps/rejected": -478.2181396484375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -1.9156514406204224, "rewards/margins": 19.40373992919922, "rewards/rejected": -21.31939125061035, "step": 3797 }, { "epoch": 1.3, "learning_rate": 5.821184199913376e-07, "logits/chosen": 0.017271196469664574, "logits/rejected": 0.014639939181506634, "logps/chosen": -209.7749481201172, "logps/rejected": -415.59869384765625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.35425066947937, "rewards/margins": 16.05469512939453, "rewards/rejected": -19.408945083618164, "step": 3798 }, { "epoch": 1.3, "learning_rate": 5.816163477344621e-07, "logits/chosen": -0.009000211022794247, "logits/rejected": 0.010747717693448067, "logps/chosen": -268.0348205566406, "logps/rejected": -405.108642578125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.5479683876037598, "rewards/margins": 14.9747314453125, "rewards/rejected": -18.5226993560791, "step": 3799 }, { "epoch": 1.3, "learning_rate": 5.811144032880182e-07, "logits/chosen": 0.04599224776029587, "logits/rejected": 0.05475253611803055, "logps/chosen": -180.94090270996094, "logps/rejected": -296.0328674316406, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664312839508057, "rewards/margins": 12.539064407348633, "rewards/rejected": -14.105494499206543, "step": 3800 }, { "epoch": 1.3, "learning_rate": 5.806125868053432e-07, "logits/chosen": 0.08120319992303848, "logits/rejected": 0.08283828943967819, "logps/chosen": -198.8981475830078, "logps/rejected": -380.7414245605469, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.105876088142395, "rewards/margins": 15.496049880981445, "rewards/rejected": -16.601924896240234, "step": 3801 }, { "epoch": 1.3, "learning_rate": 5.801108984397354e-07, "logits/chosen": 0.004853470250964165, "logits/rejected": 0.03600473329424858, "logps/chosen": -192.36160278320312, "logps/rejected": -339.87445068359375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.984299898147583, "rewards/margins": 15.79061222076416, "rewards/rejected": -17.774913787841797, "step": 3802 }, { "epoch": 1.3, "learning_rate": 5.796093383444538e-07, "logits/chosen": -0.03566775098443031, "logits/rejected": -0.02629549242556095, "logps/chosen": -154.67758178710938, "logps/rejected": -303.23876953125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.7101123332977295, "rewards/margins": 12.59479808807373, "rewards/rejected": -14.304908752441406, "step": 3803 }, { "epoch": 1.3, "learning_rate": 5.791079066727174e-07, "logits/chosen": 0.161863774061203, "logits/rejected": 0.18850526213645935, "logps/chosen": -186.12808227539062, "logps/rejected": -322.9103698730469, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.5954554080963135, "rewards/margins": 15.636213302612305, "rewards/rejected": -18.23166847229004, "step": 3804 }, { "epoch": 1.3, "learning_rate": 5.786066035777072e-07, "logits/chosen": 0.059713318943977356, "logits/rejected": 0.08324170857667923, "logps/chosen": -159.88636779785156, "logps/rejected": -308.9877624511719, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3689824342727661, "rewards/margins": 15.39421558380127, "rewards/rejected": -16.76319694519043, "step": 3805 }, { "epoch": 1.3, "learning_rate": 5.781054292125645e-07, "logits/chosen": 0.1397532969713211, "logits/rejected": 0.15281707048416138, "logps/chosen": -192.0590057373047, "logps/rejected": -359.69073486328125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.0246012210845947, "rewards/margins": 16.65252685546875, "rewards/rejected": -18.677127838134766, "step": 3806 }, { "epoch": 1.3, "learning_rate": 5.776043837303907e-07, "logits/chosen": 0.008252957835793495, "logits/rejected": 0.056089624762535095, "logps/chosen": -231.50167846679688, "logps/rejected": -297.2757568359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.6222312450408936, "rewards/margins": 13.542726516723633, "rewards/rejected": -16.164958953857422, "step": 3807 }, { "epoch": 1.3, "learning_rate": 5.771034672842483e-07, "logits/chosen": -0.051581162959337234, "logits/rejected": -0.010297469794750214, "logps/chosen": -181.7982635498047, "logps/rejected": -342.4845275878906, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.0557522773742676, "rewards/margins": 17.008094787597656, "rewards/rejected": -19.063846588134766, "step": 3808 }, { "epoch": 1.3, "learning_rate": 5.766026800271611e-07, "logits/chosen": 0.03885410726070404, "logits/rejected": 0.07592055946588516, "logps/chosen": -201.76039123535156, "logps/rejected": -384.9163818359375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.2055304050445557, "rewards/margins": 17.90330696105957, "rewards/rejected": -20.108835220336914, "step": 3809 }, { "epoch": 1.3, "learning_rate": 5.761020221121113e-07, "logits/chosen": -0.002349348273128271, "logits/rejected": 0.010016822256147861, "logps/chosen": -229.48672485351562, "logps/rejected": -392.0576171875, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": -2.4491488933563232, "rewards/margins": 16.98548126220703, "rewards/rejected": -19.434627532958984, "step": 3810 }, { "epoch": 1.3, "learning_rate": 5.756014936920446e-07, "logits/chosen": 0.1706543266773224, "logits/rejected": 0.20101311802864075, "logps/chosen": -256.5945739746094, "logps/rejected": -397.3442687988281, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.6510876417160034, "rewards/margins": 18.509578704833984, "rewards/rejected": -19.160667419433594, "step": 3811 }, { "epoch": 1.3, "learning_rate": 5.751010949198643e-07, "logits/chosen": 0.006457234267145395, "logits/rejected": 0.0160517580807209, "logps/chosen": -185.72161865234375, "logps/rejected": -386.8254089355469, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.2816240787506104, "rewards/margins": 18.154136657714844, "rewards/rejected": -20.435760498046875, "step": 3812 }, { "epoch": 1.3, "learning_rate": 5.746008259484358e-07, "logits/chosen": 0.10032766312360764, "logits/rejected": 0.10256091505289078, "logps/chosen": -227.94943237304688, "logps/rejected": -394.6006164550781, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.9681892991065979, "rewards/margins": 17.915462493896484, "rewards/rejected": -18.883651733398438, "step": 3813 }, { "epoch": 1.3, "learning_rate": 5.741006869305847e-07, "logits/chosen": 0.035687148571014404, "logits/rejected": 0.0368201807141304, "logps/chosen": -203.21653747558594, "logps/rejected": -378.08282470703125, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -2.418238401412964, "rewards/margins": 17.18442153930664, "rewards/rejected": -19.602659225463867, "step": 3814 }, { "epoch": 1.3, "learning_rate": 5.736006780190959e-07, "logits/chosen": 0.11198503524065018, "logits/rejected": 0.13355521857738495, "logps/chosen": -224.64405822753906, "logps/rejected": -369.73052978515625, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -3.8116118907928467, "rewards/margins": 14.216554641723633, "rewards/rejected": -18.028165817260742, "step": 3815 }, { "epoch": 1.3, "learning_rate": 5.731007993667154e-07, "logits/chosen": 0.12158966064453125, "logits/rejected": 0.1427362710237503, "logps/chosen": -239.34646606445312, "logps/rejected": -411.3437194824219, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.7460839748382568, "rewards/margins": 16.81770133972168, "rewards/rejected": -18.56378746032715, "step": 3816 }, { "epoch": 1.3, "learning_rate": 5.726010511261499e-07, "logits/chosen": 0.14084479212760925, "logits/rejected": 0.1488015353679657, "logps/chosen": -189.44384765625, "logps/rejected": -439.82568359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.309225559234619, "rewards/margins": 20.664316177368164, "rewards/rejected": -22.973539352416992, "step": 3817 }, { "epoch": 1.3, "learning_rate": 5.721014334500646e-07, "logits/chosen": 0.07883618026971817, "logits/rejected": 0.10579553246498108, "logps/chosen": -184.5214385986328, "logps/rejected": -359.09552001953125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -2.486647367477417, "rewards/margins": 16.52847671508789, "rewards/rejected": -19.01512336730957, "step": 3818 }, { "epoch": 1.3, "learning_rate": 5.716019464910862e-07, "logits/chosen": 0.04394140467047691, "logits/rejected": 0.07083095610141754, "logps/chosen": -201.1093292236328, "logps/rejected": -346.92974853515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.006900884211063385, "rewards/margins": 17.60629653930664, "rewards/rejected": -17.599397659301758, "step": 3819 }, { "epoch": 1.3, "learning_rate": 5.711025904018012e-07, "logits/chosen": -0.0030032237991690636, "logits/rejected": 0.025496074929833412, "logps/chosen": -211.66722106933594, "logps/rejected": -357.3846435546875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.012155771255493, "rewards/margins": 14.49824047088623, "rewards/rejected": -16.510395050048828, "step": 3820 }, { "epoch": 1.3, "learning_rate": 5.706033653347561e-07, "logits/chosen": 0.05840998515486717, "logits/rejected": 0.09313158690929413, "logps/chosen": -182.3226318359375, "logps/rejected": -231.0933380126953, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.7745742797851562, "rewards/margins": 9.375619888305664, "rewards/rejected": -11.150195121765137, "step": 3821 }, { "epoch": 1.3, "learning_rate": 5.70104271442457e-07, "logits/chosen": 0.02062467485666275, "logits/rejected": 0.02833102084696293, "logps/chosen": -191.3726043701172, "logps/rejected": -341.6680908203125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2971091270446777, "rewards/margins": 13.587575912475586, "rewards/rejected": -14.884685516357422, "step": 3822 }, { "epoch": 1.3, "learning_rate": 5.696053088773702e-07, "logits/chosen": -0.02958640642464161, "logits/rejected": -0.0012143353233113885, "logps/chosen": -200.280029296875, "logps/rejected": -398.8582458496094, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9923455715179443, "rewards/margins": 19.470497131347656, "rewards/rejected": -20.462844848632812, "step": 3823 }, { "epoch": 1.31, "learning_rate": 5.691064777919222e-07, "logits/chosen": 0.09895938634872437, "logits/rejected": 0.1319865882396698, "logps/chosen": -181.7539825439453, "logps/rejected": -405.5291748046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.961336374282837, "rewards/margins": 19.473663330078125, "rewards/rejected": -23.434999465942383, "step": 3824 }, { "epoch": 1.31, "learning_rate": 5.686077783384982e-07, "logits/chosen": 0.04055928811430931, "logits/rejected": 0.07831775397062302, "logps/chosen": -227.62326049804688, "logps/rejected": -384.98272705078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6037118434906006, "rewards/margins": 18.652124404907227, "rewards/rejected": -20.255836486816406, "step": 3825 }, { "epoch": 1.31, "learning_rate": 5.681092106694451e-07, "logits/chosen": 0.02345193177461624, "logits/rejected": 0.07352100312709808, "logps/chosen": -205.84825134277344, "logps/rejected": -290.7496032714844, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.41856849193573, "rewards/margins": 12.920082092285156, "rewards/rejected": -14.338650703430176, "step": 3826 }, { "epoch": 1.31, "learning_rate": 5.676107749370678e-07, "logits/chosen": 0.01984468847513199, "logits/rejected": 0.03404875099658966, "logps/chosen": -174.2425537109375, "logps/rejected": -318.350830078125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.7313604354858398, "rewards/margins": 16.954509735107422, "rewards/rejected": -18.685871124267578, "step": 3827 }, { "epoch": 1.31, "learning_rate": 5.671124712936315e-07, "logits/chosen": -0.05598699301481247, "logits/rejected": -0.03707832843065262, "logps/chosen": -201.57254028320312, "logps/rejected": -326.92657470703125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.754457712173462, "rewards/margins": 13.146355628967285, "rewards/rejected": -15.900814056396484, "step": 3828 }, { "epoch": 1.31, "learning_rate": 5.666142998913617e-07, "logits/chosen": 0.045043960213661194, "logits/rejected": 0.06265947967767715, "logps/chosen": -149.47923278808594, "logps/rejected": -321.06207275390625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.263122320175171, "rewards/margins": 17.16770362854004, "rewards/rejected": -19.430828094482422, "step": 3829 }, { "epoch": 1.31, "learning_rate": 5.661162608824419e-07, "logits/chosen": 0.027251459658145905, "logits/rejected": 0.03806310519576073, "logps/chosen": -191.81756591796875, "logps/rejected": -359.2811279296875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.1265052556991577, "rewards/margins": 15.732199668884277, "rewards/rejected": -16.858705520629883, "step": 3830 }, { "epoch": 1.31, "learning_rate": 5.656183544190167e-07, "logits/chosen": -0.04678894579410553, "logits/rejected": -0.028703566640615463, "logps/chosen": -242.14552307128906, "logps/rejected": -411.0732727050781, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.717721700668335, "rewards/margins": 16.17058563232422, "rewards/rejected": -18.888307571411133, "step": 3831 }, { "epoch": 1.31, "learning_rate": 5.651205806531902e-07, "logits/chosen": 0.046172428876161575, "logits/rejected": 0.06435376405715942, "logps/chosen": -197.8519287109375, "logps/rejected": -393.6077575683594, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.510378360748291, "rewards/margins": 16.580589294433594, "rewards/rejected": -19.09096908569336, "step": 3832 }, { "epoch": 1.31, "learning_rate": 5.646229397370244e-07, "logits/chosen": 0.11152683943510056, "logits/rejected": 0.1419191062450409, "logps/chosen": -166.7412109375, "logps/rejected": -298.05340576171875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.8791279792785645, "rewards/margins": 13.491989135742188, "rewards/rejected": -14.371116638183594, "step": 3833 }, { "epoch": 1.31, "learning_rate": 5.641254318225421e-07, "logits/chosen": 0.05357310175895691, "logits/rejected": 0.08722925931215286, "logps/chosen": -227.9571075439453, "logps/rejected": -362.06781005859375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.1813948154449463, "rewards/margins": 16.36082649230957, "rewards/rejected": -18.542221069335938, "step": 3834 }, { "epoch": 1.31, "learning_rate": 5.636280570617251e-07, "logits/chosen": 0.10142878443002701, "logits/rejected": 0.14946261048316956, "logps/chosen": -179.62393188476562, "logps/rejected": -279.602294921875, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.9014613628387451, "rewards/margins": 13.1007080078125, "rewards/rejected": -15.002167701721191, "step": 3835 }, { "epoch": 1.31, "learning_rate": 5.631308156065151e-07, "logits/chosen": 0.10837233066558838, "logits/rejected": 0.12795428931713104, "logps/chosen": -227.51612854003906, "logps/rejected": -377.1360168457031, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -0.2686659097671509, "rewards/margins": 18.147607803344727, "rewards/rejected": -18.416275024414062, "step": 3836 }, { "epoch": 1.31, "learning_rate": 5.626337076088117e-07, "logits/chosen": 0.02073933556675911, "logits/rejected": 0.04621371626853943, "logps/chosen": -185.39625549316406, "logps/rejected": -286.62005615234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.44963890314102173, "rewards/margins": 13.455255508422852, "rewards/rejected": -13.90489387512207, "step": 3837 }, { "epoch": 1.31, "learning_rate": 5.621367332204748e-07, "logits/chosen": 0.07260046899318695, "logits/rejected": 0.10254465788602829, "logps/chosen": -212.18148803710938, "logps/rejected": -344.2997131347656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6064492464065552, "rewards/margins": 16.26218032836914, "rewards/rejected": -17.86863136291504, "step": 3838 }, { "epoch": 1.31, "learning_rate": 5.616398925933234e-07, "logits/chosen": -0.023365415632724762, "logits/rejected": 0.001905907178297639, "logps/chosen": -177.6841278076172, "logps/rejected": -255.9303436279297, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -2.1098859310150146, "rewards/margins": 9.893900871276855, "rewards/rejected": -12.003785133361816, "step": 3839 }, { "epoch": 1.31, "learning_rate": 5.611431858791348e-07, "logits/chosen": 0.03038438968360424, "logits/rejected": 0.05151745676994324, "logps/chosen": -194.94692993164062, "logps/rejected": -279.7729187011719, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.8088622093200684, "rewards/margins": 11.300945281982422, "rewards/rejected": -14.109807014465332, "step": 3840 }, { "epoch": 1.31, "learning_rate": 5.606466132296472e-07, "logits/chosen": 0.023020412772893906, "logits/rejected": 0.06461051851511002, "logps/chosen": -225.68223571777344, "logps/rejected": -424.1038513183594, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": -1.7103943824768066, "rewards/margins": 19.480344772338867, "rewards/rejected": -21.190736770629883, "step": 3841 }, { "epoch": 1.31, "learning_rate": 5.601501747965558e-07, "logits/chosen": 0.003387679345905781, "logits/rejected": 0.058950673788785934, "logps/chosen": -217.70445251464844, "logps/rejected": -344.1247253417969, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.0058319568634033, "rewards/margins": 15.464407920837402, "rewards/rejected": -18.47024154663086, "step": 3842 }, { "epoch": 1.31, "learning_rate": 5.596538707315158e-07, "logits/chosen": 0.0446607731282711, "logits/rejected": 0.06444837152957916, "logps/chosen": -223.7418670654297, "logps/rejected": -419.6266174316406, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9109601974487305, "rewards/margins": 20.443130493164062, "rewards/rejected": -21.35409164428711, "step": 3843 }, { "epoch": 1.31, "learning_rate": 5.591577011861419e-07, "logits/chosen": -0.0727241188287735, "logits/rejected": -0.037970855832099915, "logps/chosen": -233.46327209472656, "logps/rejected": -434.9106140136719, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.5595815181732178, "rewards/margins": 19.215505599975586, "rewards/rejected": -20.775087356567383, "step": 3844 }, { "epoch": 1.31, "learning_rate": 5.586616663120061e-07, "logits/chosen": 0.08294070512056351, "logits/rejected": 0.10982593148946762, "logps/chosen": -187.0333251953125, "logps/rejected": -367.41522216796875, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -3.0817606449127197, "rewards/margins": 17.79193878173828, "rewards/rejected": -20.873699188232422, "step": 3845 }, { "epoch": 1.31, "learning_rate": 5.581657662606405e-07, "logits/chosen": 0.026384146884083748, "logits/rejected": 0.07226414978504181, "logps/chosen": -218.8479766845703, "logps/rejected": -320.9320068359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.648703098297119, "rewards/margins": 14.787734985351562, "rewards/rejected": -17.436437606811523, "step": 3846 }, { "epoch": 1.31, "learning_rate": 5.576700011835365e-07, "logits/chosen": -0.023598626255989075, "logits/rejected": 0.00318783987313509, "logps/chosen": -191.7289276123047, "logps/rejected": -379.204345703125, "loss": 0.0158, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5724666118621826, "rewards/margins": 16.47275161743164, "rewards/rejected": -18.04521942138672, "step": 3847 }, { "epoch": 1.31, "learning_rate": 5.571743712321422e-07, "logits/chosen": 0.03730367496609688, "logits/rejected": 0.09168166667222977, "logps/chosen": -230.93946838378906, "logps/rejected": -350.0509338378906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1169430017471313, "rewards/margins": 16.60129165649414, "rewards/rejected": -17.71823501586914, "step": 3848 }, { "epoch": 1.31, "learning_rate": 5.566788765578665e-07, "logits/chosen": 0.026584917679429054, "logits/rejected": 0.07398176938295364, "logps/chosen": -207.00631713867188, "logps/rejected": -301.6022644042969, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.7467312812805176, "rewards/margins": 13.321714401245117, "rewards/rejected": -16.068445205688477, "step": 3849 }, { "epoch": 1.31, "learning_rate": 5.561835173120764e-07, "logits/chosen": 0.09573237597942352, "logits/rejected": 0.11069048196077347, "logps/chosen": -173.39002990722656, "logps/rejected": -358.0768737792969, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.944728136062622, "rewards/margins": 16.774547576904297, "rewards/rejected": -18.719274520874023, "step": 3850 }, { "epoch": 1.31, "learning_rate": 5.556882936460966e-07, "logits/chosen": 0.15747514367103577, "logits/rejected": 0.17533619701862335, "logps/chosen": -234.78903198242188, "logps/rejected": -419.24853515625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.53369402885437, "rewards/margins": 17.018604278564453, "rewards/rejected": -20.552297592163086, "step": 3851 }, { "epoch": 1.31, "learning_rate": 5.551932057112114e-07, "logits/chosen": 0.07968919724225998, "logits/rejected": 0.12102560698986053, "logps/chosen": -220.41004943847656, "logps/rejected": -385.7979736328125, "loss": 0.026, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8453406095504761, "rewards/margins": 19.261289596557617, "rewards/rejected": -20.106630325317383, "step": 3852 }, { "epoch": 1.32, "learning_rate": 5.546982536586635e-07, "logits/chosen": 0.004841658752411604, "logits/rejected": 0.038707755506038666, "logps/chosen": -170.28128051757812, "logps/rejected": -304.1252746582031, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.1734039783477783, "rewards/margins": 14.16454029083252, "rewards/rejected": -15.337944030761719, "step": 3853 }, { "epoch": 1.32, "learning_rate": 5.542034376396541e-07, "logits/chosen": 0.07074237614870071, "logits/rejected": 0.10244366526603699, "logps/chosen": -220.11134338378906, "logps/rejected": -379.75537109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.2925736904144287, "rewards/margins": 18.92222023010254, "rewards/rejected": -19.214792251586914, "step": 3854 }, { "epoch": 1.32, "learning_rate": 5.537087578053421e-07, "logits/chosen": 0.07080235332250595, "logits/rejected": 0.10461665689945221, "logps/chosen": -194.38893127441406, "logps/rejected": -309.8905334472656, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.695225715637207, "rewards/margins": 16.705102920532227, "rewards/rejected": -17.400325775146484, "step": 3855 }, { "epoch": 1.32, "learning_rate": 5.532142143068455e-07, "logits/chosen": 0.19117388129234314, "logits/rejected": 0.23831847310066223, "logps/chosen": -153.27908325195312, "logps/rejected": -267.473876953125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.7815227508544922, "rewards/margins": 13.612030982971191, "rewards/rejected": -15.3935546875, "step": 3856 }, { "epoch": 1.32, "learning_rate": 5.527198072952407e-07, "logits/chosen": -0.010406124405562878, "logits/rejected": -0.0006185806123539805, "logps/chosen": -216.02041625976562, "logps/rejected": -429.414794921875, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -2.219656467437744, "rewards/margins": 19.46105194091797, "rewards/rejected": -21.680709838867188, "step": 3857 }, { "epoch": 1.32, "learning_rate": 5.522255369215622e-07, "logits/chosen": 0.09726977348327637, "logits/rejected": 0.12252727150917053, "logps/chosen": -228.00253295898438, "logps/rejected": -416.35345458984375, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -0.5362332463264465, "rewards/margins": 18.11974334716797, "rewards/rejected": -18.655975341796875, "step": 3858 }, { "epoch": 1.32, "learning_rate": 5.51731403336803e-07, "logits/chosen": -0.04885692521929741, "logits/rejected": -0.041054461151361465, "logps/chosen": -211.896728515625, "logps/rejected": -374.28656005859375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.354080319404602, "rewards/margins": 14.945359230041504, "rewards/rejected": -16.2994384765625, "step": 3859 }, { "epoch": 1.32, "learning_rate": 5.512374066919137e-07, "logits/chosen": 0.02340545877814293, "logits/rejected": 0.06872525066137314, "logps/chosen": -224.6795654296875, "logps/rejected": -274.74041748046875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.7377659678459167, "rewards/margins": 12.851065635681152, "rewards/rejected": -13.588831901550293, "step": 3860 }, { "epoch": 1.32, "learning_rate": 5.507435471378033e-07, "logits/chosen": 0.02684120461344719, "logits/rejected": 0.05252381041646004, "logps/chosen": -205.43283081054688, "logps/rejected": -297.8184814453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.4422563910484314, "rewards/margins": 14.077054977416992, "rewards/rejected": -14.51931095123291, "step": 3861 }, { "epoch": 1.32, "learning_rate": 5.5024982482534e-07, "logits/chosen": -0.03650134056806564, "logits/rejected": -0.00267917406745255, "logps/chosen": -239.60165405273438, "logps/rejected": -412.052001953125, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -3.4928455352783203, "rewards/margins": 17.519149780273438, "rewards/rejected": -21.011993408203125, "step": 3862 }, { "epoch": 1.32, "learning_rate": 5.497562399053482e-07, "logits/chosen": 0.041102126240730286, "logits/rejected": 0.057836007326841354, "logps/chosen": -158.6821746826172, "logps/rejected": -329.51824951171875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.404712200164795, "rewards/margins": 17.18873405456543, "rewards/rejected": -18.593448638916016, "step": 3863 }, { "epoch": 1.32, "learning_rate": 5.492627925286113e-07, "logits/chosen": -0.022864773869514465, "logits/rejected": 0.042369235306978226, "logps/chosen": -249.41397094726562, "logps/rejected": -370.18829345703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2950127124786377, "rewards/margins": 17.385093688964844, "rewards/rejected": -18.68010711669922, "step": 3864 }, { "epoch": 1.32, "learning_rate": 5.487694828458715e-07, "logits/chosen": 0.07432954758405685, "logits/rejected": 0.10301919281482697, "logps/chosen": -212.4710693359375, "logps/rejected": -348.162109375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.522207498550415, "rewards/margins": 15.171770095825195, "rewards/rejected": -18.69397735595703, "step": 3865 }, { "epoch": 1.32, "learning_rate": 5.482763110078273e-07, "logits/chosen": 0.13560675084590912, "logits/rejected": 0.15274456143379211, "logps/chosen": -126.55061340332031, "logps/rejected": -246.25869750976562, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.014061450958252, "rewards/margins": 11.160343170166016, "rewards/rejected": -13.17440414428711, "step": 3866 }, { "epoch": 1.32, "learning_rate": 5.47783277165136e-07, "logits/chosen": 0.010218879207968712, "logits/rejected": 0.013533948920667171, "logps/chosen": -187.6024627685547, "logps/rejected": -385.169677734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.814953625202179, "rewards/margins": 15.8150053024292, "rewards/rejected": -16.629959106445312, "step": 3867 }, { "epoch": 1.32, "learning_rate": 5.472903814684129e-07, "logits/chosen": 0.0827704593539238, "logits/rejected": 0.11025887727737427, "logps/chosen": -172.37075805664062, "logps/rejected": -277.2876892089844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.850498676300049, "rewards/margins": 12.761514663696289, "rewards/rejected": -15.61201286315918, "step": 3868 }, { "epoch": 1.32, "learning_rate": 5.46797624068231e-07, "logits/chosen": 0.009408056735992432, "logits/rejected": 0.04257439076900482, "logps/chosen": -246.4228515625, "logps/rejected": -439.4285888671875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.0838406085968018, "rewards/margins": 18.936498641967773, "rewards/rejected": -21.020339965820312, "step": 3869 }, { "epoch": 1.32, "learning_rate": 5.463050051151205e-07, "logits/chosen": 0.026804840192198753, "logits/rejected": 0.05276663973927498, "logps/chosen": -168.29446411132812, "logps/rejected": -306.8830871582031, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9128782749176025, "rewards/margins": 13.598739624023438, "rewards/rejected": -15.511617660522461, "step": 3870 }, { "epoch": 1.32, "learning_rate": 5.458125247595695e-07, "logits/chosen": -0.09263767302036285, "logits/rejected": -0.06012459844350815, "logps/chosen": -176.88563537597656, "logps/rejected": -345.3833312988281, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.9844670295715332, "rewards/margins": 16.554140090942383, "rewards/rejected": -18.538606643676758, "step": 3871 }, { "epoch": 1.32, "learning_rate": 5.453201831520245e-07, "logits/chosen": 0.06725805997848511, "logits/rejected": 0.06421057879924774, "logps/chosen": -192.66656494140625, "logps/rejected": -362.18511962890625, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.4165070652961731, "rewards/margins": 15.82397174835205, "rewards/rejected": -16.240478515625, "step": 3872 }, { "epoch": 1.32, "learning_rate": 5.448279804428887e-07, "logits/chosen": 0.03857579827308655, "logits/rejected": 0.05985824763774872, "logps/chosen": -230.58139038085938, "logps/rejected": -388.8517150878906, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -2.199686050415039, "rewards/margins": 15.569652557373047, "rewards/rejected": -17.769336700439453, "step": 3873 }, { "epoch": 1.32, "learning_rate": 5.443359167825237e-07, "logits/chosen": -0.09260234236717224, "logits/rejected": -0.018051816150546074, "logps/chosen": -261.7965087890625, "logps/rejected": -385.7745361328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.311535358428955, "rewards/margins": 18.241962432861328, "rewards/rejected": -20.553499221801758, "step": 3874 }, { "epoch": 1.32, "learning_rate": 5.438439923212475e-07, "logits/chosen": 0.01726665161550045, "logits/rejected": 0.036882542073726654, "logps/chosen": -195.3787078857422, "logps/rejected": -375.3588562011719, "loss": 0.0314, "rewards/accuracies": 0.9375, "rewards/chosen": -2.050187110900879, "rewards/margins": 16.552715301513672, "rewards/rejected": -18.6028995513916, "step": 3875 }, { "epoch": 1.32, "learning_rate": 5.433522072093366e-07, "logits/chosen": 0.1002715677022934, "logits/rejected": 0.11411435157060623, "logps/chosen": -146.84506225585938, "logps/rejected": -393.9302062988281, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.2969014644622803, "rewards/margins": 20.62529754638672, "rewards/rejected": -22.922199249267578, "step": 3876 }, { "epoch": 1.32, "learning_rate": 5.428605615970249e-07, "logits/chosen": 0.12341739982366562, "logits/rejected": 0.15015047788619995, "logps/chosen": -179.1215362548828, "logps/rejected": -283.055908203125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.7754947543144226, "rewards/margins": 13.630705833435059, "rewards/rejected": -14.40619945526123, "step": 3877 }, { "epoch": 1.32, "learning_rate": 5.423690556345026e-07, "logits/chosen": 0.11044633388519287, "logits/rejected": 0.12484162300825119, "logps/chosen": -239.7877960205078, "logps/rejected": -438.2762145996094, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -2.9930710792541504, "rewards/margins": 18.033607482910156, "rewards/rejected": -21.02667808532715, "step": 3878 }, { "epoch": 1.32, "learning_rate": 5.418776894719184e-07, "logits/chosen": -0.039408985525369644, "logits/rejected": -0.011090408079326153, "logps/chosen": -222.82601928710938, "logps/rejected": -425.50469970703125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -2.2116858959198, "rewards/margins": 19.325794219970703, "rewards/rejected": -21.537479400634766, "step": 3879 }, { "epoch": 1.32, "learning_rate": 5.41386463259378e-07, "logits/chosen": 0.019066179171204567, "logits/rejected": 0.046670034527778625, "logps/chosen": -261.90740966796875, "logps/rejected": -444.01470947265625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.6642117500305176, "rewards/margins": 19.770214080810547, "rewards/rejected": -21.434423446655273, "step": 3880 }, { "epoch": 1.32, "learning_rate": 5.408953771469437e-07, "logits/chosen": 0.17221732437610626, "logits/rejected": 0.18045805394649506, "logps/chosen": -117.67041015625, "logps/rejected": -273.26287841796875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.7506285905838013, "rewards/margins": 13.502716064453125, "rewards/rejected": -15.25334358215332, "step": 3881 }, { "epoch": 1.32, "learning_rate": 5.404044312846362e-07, "logits/chosen": 0.1439318209886551, "logits/rejected": 0.1647786796092987, "logps/chosen": -208.68698120117188, "logps/rejected": -400.4640197753906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.8985905647277832, "rewards/margins": 18.28606605529785, "rewards/rejected": -20.18465805053711, "step": 3882 }, { "epoch": 1.33, "learning_rate": 5.399136258224326e-07, "logits/chosen": 0.0738462507724762, "logits/rejected": 0.06927429884672165, "logps/chosen": -215.28802490234375, "logps/rejected": -455.5617980957031, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.9276275634765625, "rewards/margins": 20.194915771484375, "rewards/rejected": -22.122543334960938, "step": 3883 }, { "epoch": 1.33, "learning_rate": 5.394229609102665e-07, "logits/chosen": 0.11677420884370804, "logits/rejected": 0.1386338174343109, "logps/chosen": -119.4316635131836, "logps/rejected": -273.4053955078125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.309490203857422, "rewards/margins": 14.272811889648438, "rewards/rejected": -16.58230209350586, "step": 3884 }, { "epoch": 1.33, "learning_rate": 5.389324366980299e-07, "logits/chosen": 0.08448244631290436, "logits/rejected": 0.10795622318983078, "logps/chosen": -179.93482971191406, "logps/rejected": -398.5565185546875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.0549798011779785, "rewards/margins": 18.913818359375, "rewards/rejected": -20.968795776367188, "step": 3885 }, { "epoch": 1.33, "learning_rate": 5.384420533355709e-07, "logits/chosen": 0.05868370831012726, "logits/rejected": 0.1186647042632103, "logps/chosen": -276.949462890625, "logps/rejected": -412.1886901855469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.3498481214046478, "rewards/margins": 17.84398078918457, "rewards/rejected": -18.193828582763672, "step": 3886 }, { "epoch": 1.33, "learning_rate": 5.379518109726954e-07, "logits/chosen": -0.04860216751694679, "logits/rejected": -0.04209119454026222, "logps/chosen": -181.00650024414062, "logps/rejected": -355.9188232421875, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.7965209484100342, "rewards/margins": 17.284196853637695, "rewards/rejected": -19.080718994140625, "step": 3887 }, { "epoch": 1.33, "learning_rate": 5.37461709759165e-07, "logits/chosen": -0.023689547553658485, "logits/rejected": 0.01439765002578497, "logps/chosen": -182.03485107421875, "logps/rejected": -342.7809753417969, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -2.5403804779052734, "rewards/margins": 16.753652572631836, "rewards/rejected": -19.29403305053711, "step": 3888 }, { "epoch": 1.33, "learning_rate": 5.369717498446989e-07, "logits/chosen": 0.06473717838525772, "logits/rejected": 0.10359056293964386, "logps/chosen": -169.53033447265625, "logps/rejected": -307.691650390625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.5730125904083252, "rewards/margins": 16.4028263092041, "rewards/rejected": -17.975839614868164, "step": 3889 }, { "epoch": 1.33, "learning_rate": 5.364819313789732e-07, "logits/chosen": 0.12683337926864624, "logits/rejected": 0.13423727452754974, "logps/chosen": -148.0035858154297, "logps/rejected": -273.9018859863281, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.3587424755096436, "rewards/margins": 12.305405616760254, "rewards/rejected": -13.664148330688477, "step": 3890 }, { "epoch": 1.33, "learning_rate": 5.35992254511621e-07, "logits/chosen": 0.12661567330360413, "logits/rejected": 0.15064509212970734, "logps/chosen": -199.5924530029297, "logps/rejected": -262.7956848144531, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.5715112686157227, "rewards/margins": 10.85621452331543, "rewards/rejected": -12.427727699279785, "step": 3891 }, { "epoch": 1.33, "learning_rate": 5.355027193922319e-07, "logits/chosen": -0.018978482112288475, "logits/rejected": 0.002931616036221385, "logps/chosen": -237.6809844970703, "logps/rejected": -355.9058837890625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.563382148742676, "rewards/margins": 12.886504173278809, "rewards/rejected": -15.449886322021484, "step": 3892 }, { "epoch": 1.33, "learning_rate": 5.350133261703515e-07, "logits/chosen": -0.03729716315865517, "logits/rejected": 0.038928061723709106, "logps/chosen": -260.6141662597656, "logps/rejected": -369.4650573730469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.12539483606815338, "rewards/margins": 18.974397659301758, "rewards/rejected": -19.09979248046875, "step": 3893 }, { "epoch": 1.33, "learning_rate": 5.345240749954829e-07, "logits/chosen": -0.02955540642142296, "logits/rejected": -0.00424512755125761, "logps/chosen": -264.68231201171875, "logps/rejected": -382.3463439941406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7366228699684143, "rewards/margins": 15.416080474853516, "rewards/rejected": -16.15270233154297, "step": 3894 }, { "epoch": 1.33, "learning_rate": 5.340349660170862e-07, "logits/chosen": 0.058113422244787216, "logits/rejected": 0.059504516422748566, "logps/chosen": -217.43014526367188, "logps/rejected": -362.5543212890625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.5362406969070435, "rewards/margins": 15.309114456176758, "rewards/rejected": -16.845354080200195, "step": 3895 }, { "epoch": 1.33, "learning_rate": 5.335459993845763e-07, "logits/chosen": 0.06114685535430908, "logits/rejected": 0.07935803383588791, "logps/chosen": -206.08261108398438, "logps/rejected": -385.52288818359375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.3293824195861816, "rewards/margins": 16.853500366210938, "rewards/rejected": -19.182880401611328, "step": 3896 }, { "epoch": 1.33, "learning_rate": 5.330571752473265e-07, "logits/chosen": 0.09233337640762329, "logits/rejected": 0.1017289012670517, "logps/chosen": -131.55618286132812, "logps/rejected": -291.1624450683594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.6874797344207764, "rewards/margins": 13.841337203979492, "rewards/rejected": -16.52881622314453, "step": 3897 }, { "epoch": 1.33, "learning_rate": 5.325684937546662e-07, "logits/chosen": 0.0407772958278656, "logits/rejected": 0.06129668653011322, "logps/chosen": -161.08213806152344, "logps/rejected": -279.411865234375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.8303557634353638, "rewards/margins": 13.850931167602539, "rewards/rejected": -14.68128776550293, "step": 3898 }, { "epoch": 1.33, "learning_rate": 5.320799550558799e-07, "logits/chosen": -0.1086871325969696, "logits/rejected": -0.04652619734406471, "logps/chosen": -275.1871032714844, "logps/rejected": -327.3244934082031, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9651554226875305, "rewards/margins": 16.08119773864746, "rewards/rejected": -17.04635238647461, "step": 3899 }, { "epoch": 1.33, "learning_rate": 5.3159155930021e-07, "logits/chosen": 0.13274234533309937, "logits/rejected": 0.14892971515655518, "logps/chosen": -206.3684844970703, "logps/rejected": -372.4191589355469, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -2.5201048851013184, "rewards/margins": 14.641383171081543, "rewards/rejected": -17.161487579345703, "step": 3900 }, { "epoch": 1.33, "learning_rate": 5.311033066368544e-07, "logits/chosen": 0.032530177384614944, "logits/rejected": 0.06990271806716919, "logps/chosen": -242.81845092773438, "logps/rejected": -382.60943603515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.4520936012268066, "rewards/margins": 15.953271865844727, "rewards/rejected": -18.405364990234375, "step": 3901 }, { "epoch": 1.33, "learning_rate": 5.306151972149682e-07, "logits/chosen": -0.01713409461081028, "logits/rejected": 0.00040798354893922806, "logps/chosen": -238.24916076660156, "logps/rejected": -414.19964599609375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.557219982147217, "rewards/margins": 17.66185188293457, "rewards/rejected": -20.219072341918945, "step": 3902 }, { "epoch": 1.33, "learning_rate": 5.301272311836611e-07, "logits/chosen": 0.06793690472841263, "logits/rejected": 0.10988922417163849, "logps/chosen": -196.94760131835938, "logps/rejected": -436.83575439453125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.6892859935760498, "rewards/margins": 21.771343231201172, "rewards/rejected": -23.460630416870117, "step": 3903 }, { "epoch": 1.33, "learning_rate": 5.296394086920005e-07, "logits/chosen": -0.012944142334163189, "logits/rejected": 0.0019944505766034126, "logps/chosen": -138.4515380859375, "logps/rejected": -271.3386535644531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5368702411651611, "rewards/margins": 13.350908279418945, "rewards/rejected": -14.887777328491211, "step": 3904 }, { "epoch": 1.33, "learning_rate": 5.291517298890094e-07, "logits/chosen": -0.024875003844499588, "logits/rejected": 0.012673551216721535, "logps/chosen": -253.13140869140625, "logps/rejected": -440.3982238769531, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.371186375617981, "rewards/margins": 20.059978485107422, "rewards/rejected": -21.431163787841797, "step": 3905 }, { "epoch": 1.33, "learning_rate": 5.286641949236672e-07, "logits/chosen": -0.05049222335219383, "logits/rejected": -0.061044007539749146, "logps/chosen": -191.58322143554688, "logps/rejected": -394.9744873046875, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.9975701570510864, "rewards/margins": 16.27602767944336, "rewards/rejected": -18.273595809936523, "step": 3906 }, { "epoch": 1.33, "learning_rate": 5.281768039449091e-07, "logits/chosen": -0.05801549553871155, "logits/rejected": -0.04250326752662659, "logps/chosen": -261.0137939453125, "logps/rejected": -399.02911376953125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.0908684730529785, "rewards/margins": 13.651790618896484, "rewards/rejected": -16.742660522460938, "step": 3907 }, { "epoch": 1.33, "learning_rate": 5.276895571016257e-07, "logits/chosen": 0.044302958995103836, "logits/rejected": 0.07144390046596527, "logps/chosen": -189.86727905273438, "logps/rejected": -314.4184875488281, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.3056409358978271, "rewards/margins": 15.393655776977539, "rewards/rejected": -16.699295043945312, "step": 3908 }, { "epoch": 1.33, "learning_rate": 5.272024545426646e-07, "logits/chosen": 0.03139175847172737, "logits/rejected": 0.05148448422551155, "logps/chosen": -204.01243591308594, "logps/rejected": -395.89605712890625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.664994239807129, "rewards/margins": 18.35838508605957, "rewards/rejected": -20.023380279541016, "step": 3909 }, { "epoch": 1.33, "learning_rate": 5.267154964168292e-07, "logits/chosen": 0.0314180888235569, "logits/rejected": 0.06400138884782791, "logps/chosen": -194.01748657226562, "logps/rejected": -321.3016662597656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.5939360857009888, "rewards/margins": 14.10557746887207, "rewards/rejected": -15.69951343536377, "step": 3910 }, { "epoch": 1.33, "learning_rate": 5.262286828728779e-07, "logits/chosen": -0.020664827898144722, "logits/rejected": 0.04075061157345772, "logps/chosen": -220.93240356445312, "logps/rejected": -374.47320556640625, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.7474239468574524, "rewards/margins": 14.597764015197754, "rewards/rejected": -15.345189094543457, "step": 3911 }, { "epoch": 1.34, "learning_rate": 5.257420140595257e-07, "logits/chosen": -0.010028964839875698, "logits/rejected": 0.0438910648226738, "logps/chosen": -237.22161865234375, "logps/rejected": -318.44769287109375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.6746439933776855, "rewards/margins": 13.701029777526855, "rewards/rejected": -15.375673294067383, "step": 3912 }, { "epoch": 1.34, "learning_rate": 5.252554901254438e-07, "logits/chosen": 0.04085863381624222, "logits/rejected": 0.08495690673589706, "logps/chosen": -221.15615844726562, "logps/rejected": -342.6322937011719, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.8963950872421265, "rewards/margins": 15.036866188049316, "rewards/rejected": -16.933259963989258, "step": 3913 }, { "epoch": 1.34, "learning_rate": 5.247691112192576e-07, "logits/chosen": 0.002713704016059637, "logits/rejected": 0.02667837403714657, "logps/chosen": -218.03787231445312, "logps/rejected": -417.2239990234375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8847856521606445, "rewards/margins": 19.601970672607422, "rewards/rejected": -20.486759185791016, "step": 3914 }, { "epoch": 1.34, "learning_rate": 5.242828774895496e-07, "logits/chosen": 0.059710580855607986, "logits/rejected": 0.09299446642398834, "logps/chosen": -253.447998046875, "logps/rejected": -440.40789794921875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8264636993408203, "rewards/margins": 19.098461151123047, "rewards/rejected": -21.924924850463867, "step": 3915 }, { "epoch": 1.34, "learning_rate": 5.237967890848573e-07, "logits/chosen": -0.06099459528923035, "logits/rejected": -0.02752554975450039, "logps/chosen": -201.9622802734375, "logps/rejected": -366.4029846191406, "loss": 0.0319, "rewards/accuracies": 0.9375, "rewards/chosen": -1.998103141784668, "rewards/margins": 15.728188514709473, "rewards/rejected": -17.72629165649414, "step": 3916 }, { "epoch": 1.34, "learning_rate": 5.233108461536748e-07, "logits/chosen": 0.16548995673656464, "logits/rejected": 0.18525190651416779, "logps/chosen": -175.65493774414062, "logps/rejected": -318.51544189453125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.6040782928466797, "rewards/margins": 14.955291748046875, "rewards/rejected": -16.559370040893555, "step": 3917 }, { "epoch": 1.34, "learning_rate": 5.228250488444499e-07, "logits/chosen": 0.09236408025026321, "logits/rejected": 0.09868581593036652, "logps/chosen": -207.6663360595703, "logps/rejected": -451.0875549316406, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6121208667755127, "rewards/margins": 18.587875366210938, "rewards/rejected": -20.199993133544922, "step": 3918 }, { "epoch": 1.34, "learning_rate": 5.223393973055874e-07, "logits/chosen": 0.06375964730978012, "logits/rejected": 0.10607162117958069, "logps/chosen": -171.8168182373047, "logps/rejected": -338.76385498046875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -2.030580520629883, "rewards/margins": 16.269628524780273, "rewards/rejected": -18.300209045410156, "step": 3919 }, { "epoch": 1.34, "learning_rate": 5.218538916854473e-07, "logits/chosen": 0.0005286016967147589, "logits/rejected": 0.06347031146287918, "logps/chosen": -287.84796142578125, "logps/rejected": -423.9748840332031, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -2.227396011352539, "rewards/margins": 19.02145004272461, "rewards/rejected": -21.24884605407715, "step": 3920 }, { "epoch": 1.34, "learning_rate": 5.21368532132345e-07, "logits/chosen": -0.010308337397873402, "logits/rejected": 0.021274689584970474, "logps/chosen": -239.51312255859375, "logps/rejected": -376.91912841796875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.5590981245040894, "rewards/margins": 15.761319160461426, "rewards/rejected": -17.320417404174805, "step": 3921 }, { "epoch": 1.34, "learning_rate": 5.208833187945505e-07, "logits/chosen": -0.0757099986076355, "logits/rejected": -0.043035995215177536, "logps/chosen": -226.6184539794922, "logps/rejected": -364.24334716796875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.59872305393219, "rewards/margins": 16.088335037231445, "rewards/rejected": -17.687061309814453, "step": 3922 }, { "epoch": 1.34, "learning_rate": 5.2039825182029e-07, "logits/chosen": 0.05315084010362625, "logits/rejected": 0.052178218960762024, "logps/chosen": -194.4718780517578, "logps/rejected": -363.9794921875, "loss": 0.0384, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4386754035949707, "rewards/margins": 14.744829177856445, "rewards/rejected": -18.18350601196289, "step": 3923 }, { "epoch": 1.34, "learning_rate": 5.199133313577452e-07, "logits/chosen": 0.06656022369861603, "logits/rejected": 0.09352298080921173, "logps/chosen": -185.03079223632812, "logps/rejected": -346.6766662597656, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.7754693031311035, "rewards/margins": 16.019887924194336, "rewards/rejected": -17.79535675048828, "step": 3924 }, { "epoch": 1.34, "learning_rate": 5.194285575550525e-07, "logits/chosen": 0.10109155625104904, "logits/rejected": 0.13164637982845306, "logps/chosen": -127.61095428466797, "logps/rejected": -265.76666259765625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -2.3593850135803223, "rewards/margins": 13.503633499145508, "rewards/rejected": -15.863018989562988, "step": 3925 }, { "epoch": 1.34, "learning_rate": 5.189439305603032e-07, "logits/chosen": 0.0861169621348381, "logits/rejected": 0.11172175407409668, "logps/chosen": -181.03302001953125, "logps/rejected": -384.80792236328125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.8810856342315674, "rewards/margins": 18.873117446899414, "rewards/rejected": -21.754201889038086, "step": 3926 }, { "epoch": 1.34, "learning_rate": 5.184594505215443e-07, "logits/chosen": 0.11806169897317886, "logits/rejected": 0.13699601590633392, "logps/chosen": -161.7549285888672, "logps/rejected": -306.72808837890625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.1756160259246826, "rewards/margins": 14.367795944213867, "rewards/rejected": -16.543411254882812, "step": 3927 }, { "epoch": 1.34, "learning_rate": 5.179751175867784e-07, "logits/chosen": -0.04915947839617729, "logits/rejected": -0.03129354119300842, "logps/chosen": -286.93890380859375, "logps/rejected": -459.09613037109375, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.6932692527770996, "rewards/margins": 18.422626495361328, "rewards/rejected": -19.11589813232422, "step": 3928 }, { "epoch": 1.34, "learning_rate": 5.174909319039613e-07, "logits/chosen": 0.007861153222620487, "logits/rejected": 0.05488528311252594, "logps/chosen": -193.78054809570312, "logps/rejected": -321.9996643066406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7756304740905762, "rewards/margins": 16.661893844604492, "rewards/rejected": -18.437524795532227, "step": 3929 }, { "epoch": 1.34, "learning_rate": 5.17006893621006e-07, "logits/chosen": -0.07217681407928467, "logits/rejected": -0.0008451155736111104, "logps/chosen": -234.4095916748047, "logps/rejected": -376.76971435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4069085121154785, "rewards/margins": 16.682260513305664, "rewards/rejected": -18.089168548583984, "step": 3930 }, { "epoch": 1.34, "learning_rate": 5.165230028857796e-07, "logits/chosen": 0.04959268122911453, "logits/rejected": 0.05269613116979599, "logps/chosen": -236.8386993408203, "logps/rejected": -380.9330749511719, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.826064109802246, "rewards/margins": 14.472232818603516, "rewards/rejected": -17.298295974731445, "step": 3931 }, { "epoch": 1.34, "learning_rate": 5.160392598461032e-07, "logits/chosen": 0.04950323700904846, "logits/rejected": 0.061742041260004044, "logps/chosen": -186.53392028808594, "logps/rejected": -350.3227844238281, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -1.1679067611694336, "rewards/margins": 16.392345428466797, "rewards/rejected": -17.560253143310547, "step": 3932 }, { "epoch": 1.34, "learning_rate": 5.155556646497541e-07, "logits/chosen": -0.002902116859331727, "logits/rejected": 0.033778563141822815, "logps/chosen": -200.31219482421875, "logps/rejected": -314.1502990722656, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.05475500226020813, "rewards/margins": 15.497345924377441, "rewards/rejected": -15.442590713500977, "step": 3933 }, { "epoch": 1.34, "learning_rate": 5.150722174444642e-07, "logits/chosen": 0.13949641585350037, "logits/rejected": 0.13701364398002625, "logps/chosen": -179.4264373779297, "logps/rejected": -396.14227294921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.9601316452026367, "rewards/margins": 17.815893173217773, "rewards/rejected": -19.776023864746094, "step": 3934 }, { "epoch": 1.34, "learning_rate": 5.145889183779195e-07, "logits/chosen": 0.10794994980096817, "logits/rejected": 0.108425073325634, "logps/chosen": -197.43569946289062, "logps/rejected": -405.56439208984375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.2664188742637634, "rewards/margins": 17.88425636291504, "rewards/rejected": -18.15067481994629, "step": 3935 }, { "epoch": 1.34, "learning_rate": 5.141057675977618e-07, "logits/chosen": 0.025034556165337563, "logits/rejected": 0.043978288769721985, "logps/chosen": -213.12001037597656, "logps/rejected": -335.7566833496094, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.216214656829834, "rewards/margins": 14.376556396484375, "rewards/rejected": -15.592771530151367, "step": 3936 }, { "epoch": 1.34, "learning_rate": 5.136227652515863e-07, "logits/chosen": 0.08745001256465912, "logits/rejected": 0.09743749350309372, "logps/chosen": -188.6238555908203, "logps/rejected": -350.00689697265625, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -1.9175910949707031, "rewards/margins": 15.157398223876953, "rewards/rejected": -17.074987411499023, "step": 3937 }, { "epoch": 1.34, "learning_rate": 5.131399114869439e-07, "logits/chosen": 0.08421964198350906, "logits/rejected": 0.07478857785463333, "logps/chosen": -142.3344268798828, "logps/rejected": -334.8448791503906, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.37688410282135, "rewards/margins": 15.458930969238281, "rewards/rejected": -16.8358154296875, "step": 3938 }, { "epoch": 1.34, "learning_rate": 5.126572064513396e-07, "logits/chosen": 0.045005157589912415, "logits/rejected": 0.09605788439512253, "logps/chosen": -198.3741912841797, "logps/rejected": -350.6566467285156, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.340458631515503, "rewards/margins": 17.755172729492188, "rewards/rejected": -19.095630645751953, "step": 3939 }, { "epoch": 1.34, "learning_rate": 5.121746502922338e-07, "logits/chosen": 0.0916072204709053, "logits/rejected": 0.11352021992206573, "logps/chosen": -232.25656127929688, "logps/rejected": -324.9888610839844, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.2331852912902832, "rewards/margins": 12.756182670593262, "rewards/rejected": -13.98936653137207, "step": 3940 }, { "epoch": 1.35, "learning_rate": 5.116922431570396e-07, "logits/chosen": 0.022355834022164345, "logits/rejected": 0.04798267036676407, "logps/chosen": -159.2699432373047, "logps/rejected": -274.3072814941406, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.6455297470092773, "rewards/margins": 12.188674926757812, "rewards/rejected": -14.834205627441406, "step": 3941 }, { "epoch": 1.35, "learning_rate": 5.112099851931265e-07, "logits/chosen": -0.04191673547029495, "logits/rejected": -0.030528083443641663, "logps/chosen": -165.04405212402344, "logps/rejected": -316.5985107421875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.3574538230895996, "rewards/margins": 15.485848426818848, "rewards/rejected": -16.84330177307129, "step": 3942 }, { "epoch": 1.35, "learning_rate": 5.107278765478178e-07, "logits/chosen": 0.025368543341755867, "logits/rejected": 0.07102690637111664, "logps/chosen": -276.23394775390625, "logps/rejected": -391.58270263671875, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.6506963968276978, "rewards/margins": 16.415164947509766, "rewards/rejected": -18.06586265563965, "step": 3943 }, { "epoch": 1.35, "learning_rate": 5.102459173683903e-07, "logits/chosen": -0.1363677829504013, "logits/rejected": -0.11335001140832901, "logps/chosen": -237.22512817382812, "logps/rejected": -402.2283935546875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6176162958145142, "rewards/margins": 17.350784301757812, "rewards/rejected": -18.968400955200195, "step": 3944 }, { "epoch": 1.35, "learning_rate": 5.097641078020763e-07, "logits/chosen": 0.12132147699594498, "logits/rejected": 0.1297401636838913, "logps/chosen": -230.4996337890625, "logps/rejected": -388.94384765625, "loss": 0.0181, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5773037672042847, "rewards/margins": 14.068772315979004, "rewards/rejected": -15.646076202392578, "step": 3945 }, { "epoch": 1.35, "learning_rate": 5.092824479960625e-07, "logits/chosen": 0.032491285353899, "logits/rejected": 0.10293025523424149, "logps/chosen": -238.97064208984375, "logps/rejected": -374.3581237792969, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5522717237472534, "rewards/margins": 18.635730743408203, "rewards/rejected": -20.18800163269043, "step": 3946 }, { "epoch": 1.35, "learning_rate": 5.088009380974881e-07, "logits/chosen": 0.03484540060162544, "logits/rejected": 0.06535979360342026, "logps/chosen": -176.14743041992188, "logps/rejected": -345.1978759765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.341707706451416, "rewards/margins": 15.757125854492188, "rewards/rejected": -18.098833084106445, "step": 3947 }, { "epoch": 1.35, "learning_rate": 5.083195782534486e-07, "logits/chosen": 0.025518257170915604, "logits/rejected": 0.06456308811903, "logps/chosen": -211.00714111328125, "logps/rejected": -317.3239440917969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.0353509783744812, "rewards/margins": 15.241573333740234, "rewards/rejected": -15.206222534179688, "step": 3948 }, { "epoch": 1.35, "learning_rate": 5.078383686109926e-07, "logits/chosen": 0.015961553901433945, "logits/rejected": 0.03672380745410919, "logps/chosen": -167.19284057617188, "logps/rejected": -286.4727478027344, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.6217834949493408, "rewards/margins": 14.007375717163086, "rewards/rejected": -15.629158020019531, "step": 3949 }, { "epoch": 1.35, "learning_rate": 5.07357309317123e-07, "logits/chosen": 0.06687097996473312, "logits/rejected": 0.11267784982919693, "logps/chosen": -242.49676513671875, "logps/rejected": -326.13165283203125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.4826533794403076, "rewards/margins": 14.067543029785156, "rewards/rejected": -16.550195693969727, "step": 3950 }, { "epoch": 1.35, "learning_rate": 5.068764005187972e-07, "logits/chosen": -0.08377090096473694, "logits/rejected": -0.023079250007867813, "logps/chosen": -213.0711212158203, "logps/rejected": -409.875244140625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.8316729068756104, "rewards/margins": 18.683359146118164, "rewards/rejected": -19.515029907226562, "step": 3951 }, { "epoch": 1.35, "learning_rate": 5.063956423629255e-07, "logits/chosen": 0.018174922093749046, "logits/rejected": 0.03931405395269394, "logps/chosen": -216.45913696289062, "logps/rejected": -412.8990173339844, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.9547883868217468, "rewards/margins": 20.173259735107422, "rewards/rejected": -21.128047943115234, "step": 3952 }, { "epoch": 1.35, "learning_rate": 5.059150349963729e-07, "logits/chosen": -0.006308612879365683, "logits/rejected": 0.015408704988658428, "logps/chosen": -216.1752166748047, "logps/rejected": -381.81610107421875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.7753405570983887, "rewards/margins": 17.734872817993164, "rewards/rejected": -19.510215759277344, "step": 3953 }, { "epoch": 1.35, "learning_rate": 5.054345785659588e-07, "logits/chosen": -0.033555854111909866, "logits/rejected": -0.029478514567017555, "logps/chosen": -249.91563415527344, "logps/rejected": -456.2713317871094, "loss": 0.0156, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6648437976837158, "rewards/margins": 19.045894622802734, "rewards/rejected": -20.710737228393555, "step": 3954 }, { "epoch": 1.35, "learning_rate": 5.04954273218456e-07, "logits/chosen": -0.05384499579668045, "logits/rejected": 0.0010737761622294784, "logps/chosen": -279.03436279296875, "logps/rejected": -349.87591552734375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.5538387298583984, "rewards/margins": 13.541905403137207, "rewards/rejected": -16.095745086669922, "step": 3955 }, { "epoch": 1.35, "learning_rate": 5.044741191005908e-07, "logits/chosen": 0.052683066576719284, "logits/rejected": 0.09448317438364029, "logps/chosen": -161.69715881347656, "logps/rejected": -317.789794921875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.6158106923103333, "rewards/margins": 15.366888046264648, "rewards/rejected": -15.982697486877441, "step": 3956 }, { "epoch": 1.35, "learning_rate": 5.039941163590435e-07, "logits/chosen": 0.05080169811844826, "logits/rejected": 0.06305819749832153, "logps/chosen": -198.60357666015625, "logps/rejected": -320.04461669921875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.5955049991607666, "rewards/margins": 12.68947982788086, "rewards/rejected": -14.284984588623047, "step": 3957 }, { "epoch": 1.35, "learning_rate": 5.035142651404493e-07, "logits/chosen": 0.02846548706293106, "logits/rejected": 0.07555302232503891, "logps/chosen": -230.0121612548828, "logps/rejected": -347.30291748046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.124239206314087, "rewards/margins": 15.516840934753418, "rewards/rejected": -17.641080856323242, "step": 3958 }, { "epoch": 1.35, "learning_rate": 5.03034565591395e-07, "logits/chosen": 0.02381731942296028, "logits/rejected": 0.06112292781472206, "logps/chosen": -174.59234619140625, "logps/rejected": -243.9619140625, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -1.5033007860183716, "rewards/margins": 12.023826599121094, "rewards/rejected": -13.527128219604492, "step": 3959 }, { "epoch": 1.35, "learning_rate": 5.025550178584226e-07, "logits/chosen": -0.0023486095014959574, "logits/rejected": 0.007912881672382355, "logps/chosen": -206.0892333984375, "logps/rejected": -410.3982238769531, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.0894412994384766, "rewards/margins": 16.60987091064453, "rewards/rejected": -18.699312210083008, "step": 3960 }, { "epoch": 1.35, "learning_rate": 5.020756220880277e-07, "logits/chosen": 0.06997627019882202, "logits/rejected": 0.11405216157436371, "logps/chosen": -203.7671661376953, "logps/rejected": -377.62322998046875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -1.638761281967163, "rewards/margins": 16.586336135864258, "rewards/rejected": -18.22509765625, "step": 3961 }, { "epoch": 1.35, "learning_rate": 5.015963784266583e-07, "logits/chosen": -0.054776161909103394, "logits/rejected": -0.008961193263530731, "logps/chosen": -237.35525512695312, "logps/rejected": -378.6042785644531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.49787437915802, "rewards/margins": 18.549814224243164, "rewards/rejected": -20.047685623168945, "step": 3962 }, { "epoch": 1.35, "learning_rate": 5.011172870207173e-07, "logits/chosen": -0.044156428426504135, "logits/rejected": -0.02020302042365074, "logps/chosen": -243.49160766601562, "logps/rejected": -387.3462219238281, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -1.7740291357040405, "rewards/margins": 16.171903610229492, "rewards/rejected": -17.945932388305664, "step": 3963 }, { "epoch": 1.35, "learning_rate": 5.006383480165608e-07, "logits/chosen": 0.0035837062168866396, "logits/rejected": 0.01986328512430191, "logps/chosen": -164.60145568847656, "logps/rejected": -325.56414794921875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.562018871307373, "rewards/margins": 15.200801849365234, "rewards/rejected": -16.762821197509766, "step": 3964 }, { "epoch": 1.35, "learning_rate": 5.001595615604968e-07, "logits/chosen": -0.012105247937142849, "logits/rejected": 0.010665317066013813, "logps/chosen": -147.83958435058594, "logps/rejected": -314.77685546875, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -1.974736213684082, "rewards/margins": 16.195226669311523, "rewards/rejected": -18.169963836669922, "step": 3965 }, { "epoch": 1.35, "learning_rate": 4.996809277987895e-07, "logits/chosen": 0.09527210891246796, "logits/rejected": 0.10690024495124817, "logps/chosen": -166.48170471191406, "logps/rejected": -280.8105163574219, "loss": 0.0248, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1955986022949219, "rewards/margins": 12.008728981018066, "rewards/rejected": -13.204326629638672, "step": 3966 }, { "epoch": 1.35, "learning_rate": 4.992024468776539e-07, "logits/chosen": -0.04064367339015007, "logits/rejected": -0.002848030999302864, "logps/chosen": -156.84735107421875, "logps/rejected": -284.8809509277344, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9223688244819641, "rewards/margins": 13.023384094238281, "rewards/rejected": -13.945752143859863, "step": 3967 }, { "epoch": 1.35, "learning_rate": 4.987241189432596e-07, "logits/chosen": -0.014145997352898121, "logits/rejected": 0.014298008754849434, "logps/chosen": -187.2074432373047, "logps/rejected": -367.7205810546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.638981819152832, "rewards/margins": 19.356698989868164, "rewards/rejected": -20.995681762695312, "step": 3968 }, { "epoch": 1.35, "learning_rate": 4.982459441417296e-07, "logits/chosen": 0.058806732296943665, "logits/rejected": 0.07628738880157471, "logps/chosen": -221.45956420898438, "logps/rejected": -342.69537353515625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.5019516944885254, "rewards/margins": 12.281734466552734, "rewards/rejected": -14.783684730529785, "step": 3969 }, { "epoch": 1.35, "learning_rate": 4.977679226191389e-07, "logits/chosen": -0.05475452169775963, "logits/rejected": -0.010480847209692001, "logps/chosen": -188.9986114501953, "logps/rejected": -312.4149169921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.237964153289795, "rewards/margins": 14.027945518493652, "rewards/rejected": -16.26590919494629, "step": 3970 }, { "epoch": 1.36, "learning_rate": 4.972900545215168e-07, "logits/chosen": -0.024344351142644882, "logits/rejected": -0.010071341879665852, "logps/chosen": -192.80221557617188, "logps/rejected": -357.27099609375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.030860662460327, "rewards/margins": 15.319212913513184, "rewards/rejected": -17.350074768066406, "step": 3971 }, { "epoch": 1.36, "learning_rate": 4.968123399948456e-07, "logits/chosen": 0.08222594857215881, "logits/rejected": 0.10040011256933212, "logps/chosen": -172.2129669189453, "logps/rejected": -312.7105407714844, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.4757517576217651, "rewards/margins": 14.043992042541504, "rewards/rejected": -15.519742965698242, "step": 3972 }, { "epoch": 1.36, "learning_rate": 4.963347791850608e-07, "logits/chosen": 0.0533609539270401, "logits/rejected": 0.08580277860164642, "logps/chosen": -180.64385986328125, "logps/rejected": -341.8046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0820553302764893, "rewards/margins": 17.949514389038086, "rewards/rejected": -19.03156852722168, "step": 3973 }, { "epoch": 1.36, "learning_rate": 4.958573722380498e-07, "logits/chosen": 0.023697925731539726, "logits/rejected": 0.03683925047516823, "logps/chosen": -162.0713653564453, "logps/rejected": -321.6473388671875, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -0.5891001224517822, "rewards/margins": 15.182450294494629, "rewards/rejected": -15.771550178527832, "step": 3974 }, { "epoch": 1.36, "learning_rate": 4.953801192996543e-07, "logits/chosen": -0.0836307629942894, "logits/rejected": -0.05157528072595596, "logps/chosen": -198.369384765625, "logps/rejected": -369.5057678222656, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.21308737993240356, "rewards/margins": 19.0415096282959, "rewards/rejected": -19.25459861755371, "step": 3975 }, { "epoch": 1.36, "learning_rate": 4.949030205156687e-07, "logits/chosen": 0.03351020812988281, "logits/rejected": 0.026580670848488808, "logps/chosen": -248.20188903808594, "logps/rejected": -444.4577331542969, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.8815439939498901, "rewards/margins": 17.589725494384766, "rewards/rejected": -18.471269607543945, "step": 3976 }, { "epoch": 1.36, "learning_rate": 4.944260760318397e-07, "logits/chosen": 0.06983634829521179, "logits/rejected": 0.08515885472297668, "logps/chosen": -202.12973022460938, "logps/rejected": -298.6507568359375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.47225865721702576, "rewards/margins": 12.736620903015137, "rewards/rejected": -13.208878517150879, "step": 3977 }, { "epoch": 1.36, "learning_rate": 4.939492859938672e-07, "logits/chosen": 0.032709479331970215, "logits/rejected": 0.06412118673324585, "logps/chosen": -228.79502868652344, "logps/rejected": -349.5330505371094, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.9291015267372131, "rewards/margins": 17.649381637573242, "rewards/rejected": -18.578481674194336, "step": 3978 }, { "epoch": 1.36, "learning_rate": 4.934726505474046e-07, "logits/chosen": 0.0003840868885163218, "logits/rejected": 0.04016396775841713, "logps/chosen": -184.95281982421875, "logps/rejected": -269.25372314453125, "loss": 0.0174, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5584677457809448, "rewards/margins": 11.54983901977539, "rewards/rejected": -13.108306884765625, "step": 3979 }, { "epoch": 1.36, "learning_rate": 4.929961698380564e-07, "logits/chosen": -0.03609228506684303, "logits/rejected": -0.009281004779040813, "logps/chosen": -191.94515991210938, "logps/rejected": -407.23199462890625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.7506141066551208, "rewards/margins": 18.554893493652344, "rewards/rejected": -19.30550765991211, "step": 3980 }, { "epoch": 1.36, "learning_rate": 4.925198440113821e-07, "logits/chosen": -0.03022667020559311, "logits/rejected": -0.005729960277676582, "logps/chosen": -151.3555450439453, "logps/rejected": -307.9197998046875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -3.2290003299713135, "rewards/margins": 15.10135269165039, "rewards/rejected": -18.330352783203125, "step": 3981 }, { "epoch": 1.36, "learning_rate": 4.920436732128917e-07, "logits/chosen": 0.10159728676080704, "logits/rejected": 0.1192009449005127, "logps/chosen": -160.1912078857422, "logps/rejected": -358.68511962890625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -3.0421667098999023, "rewards/margins": 17.34374237060547, "rewards/rejected": -20.385910034179688, "step": 3982 }, { "epoch": 1.36, "learning_rate": 4.915676575880493e-07, "logits/chosen": -0.05938684567809105, "logits/rejected": -0.03599126264452934, "logps/chosen": -226.3002166748047, "logps/rejected": -347.924560546875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.8456262946128845, "rewards/margins": 14.47398853302002, "rewards/rejected": -15.31961441040039, "step": 3983 }, { "epoch": 1.36, "learning_rate": 4.910917972822712e-07, "logits/chosen": 0.018529511988162994, "logits/rejected": 0.046465687453746796, "logps/chosen": -163.980224609375, "logps/rejected": -271.31646728515625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.6280510425567627, "rewards/margins": 13.332962036132812, "rewards/rejected": -14.96101188659668, "step": 3984 }, { "epoch": 1.36, "learning_rate": 4.906160924409257e-07, "logits/chosen": -0.07566817849874496, "logits/rejected": -0.0682370737195015, "logps/chosen": -237.3731689453125, "logps/rejected": -440.2101745605469, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": -3.255289316177368, "rewards/margins": 17.484956741333008, "rewards/rejected": -20.740245819091797, "step": 3985 }, { "epoch": 1.36, "learning_rate": 4.901405432093343e-07, "logits/chosen": 0.043552570044994354, "logits/rejected": 0.053558751940727234, "logps/chosen": -146.7921142578125, "logps/rejected": -293.87335205078125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.415688395500183, "rewards/margins": 12.380971908569336, "rewards/rejected": -13.796660423278809, "step": 3986 }, { "epoch": 1.36, "learning_rate": 4.896651497327707e-07, "logits/chosen": -0.0038958117365837097, "logits/rejected": 0.010749485343694687, "logps/chosen": -208.75411987304688, "logps/rejected": -348.5027160644531, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -2.3640646934509277, "rewards/margins": 13.56076431274414, "rewards/rejected": -15.924829483032227, "step": 3987 }, { "epoch": 1.36, "learning_rate": 4.891899121564614e-07, "logits/chosen": -0.011261035688221455, "logits/rejected": 0.005882570054382086, "logps/chosen": -206.72947692871094, "logps/rejected": -415.3561706542969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.0765600204467773, "rewards/margins": 18.543561935424805, "rewards/rejected": -20.6201229095459, "step": 3988 }, { "epoch": 1.36, "learning_rate": 4.887148306255844e-07, "logits/chosen": 0.06045755371451378, "logits/rejected": 0.11026880145072937, "logps/chosen": -210.67657470703125, "logps/rejected": -358.8061828613281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.114591121673584, "rewards/margins": 16.391935348510742, "rewards/rejected": -17.50652503967285, "step": 3989 }, { "epoch": 1.36, "learning_rate": 4.882399052852706e-07, "logits/chosen": -0.05860546976327896, "logits/rejected": -0.03176113963127136, "logps/chosen": -231.04483032226562, "logps/rejected": -392.4882507324219, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.1152180433273315, "rewards/margins": 17.488571166992188, "rewards/rejected": -18.603788375854492, "step": 3990 }, { "epoch": 1.36, "learning_rate": 4.877651362806037e-07, "logits/chosen": 0.01789405569434166, "logits/rejected": 0.039810195565223694, "logps/chosen": -146.69558715820312, "logps/rejected": -296.68310546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8426179885864258, "rewards/margins": 15.619331359863281, "rewards/rejected": -17.461950302124023, "step": 3991 }, { "epoch": 1.36, "learning_rate": 4.872905237566182e-07, "logits/chosen": -0.07382452487945557, "logits/rejected": -0.04316489398479462, "logps/chosen": -221.28707885742188, "logps/rejected": -350.5626220703125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.4827815592288971, "rewards/margins": 15.79442024230957, "rewards/rejected": -16.277202606201172, "step": 3992 }, { "epoch": 1.36, "learning_rate": 4.868160678583023e-07, "logits/chosen": -0.021514886990189552, "logits/rejected": 0.004178990609943867, "logps/chosen": -226.97999572753906, "logps/rejected": -407.3298034667969, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.883164405822754, "rewards/margins": 17.641332626342773, "rewards/rejected": -19.52449607849121, "step": 3993 }, { "epoch": 1.36, "learning_rate": 4.86341768730596e-07, "logits/chosen": -0.008558413945138454, "logits/rejected": 0.00907889660447836, "logps/chosen": -253.29757690429688, "logps/rejected": -372.3512268066406, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.4353203773498535, "rewards/margins": 14.398033142089844, "rewards/rejected": -14.833353042602539, "step": 3994 }, { "epoch": 1.36, "learning_rate": 4.858676265183898e-07, "logits/chosen": -0.07131282985210419, "logits/rejected": -0.011175648309290409, "logps/chosen": -215.56881713867188, "logps/rejected": -331.97418212890625, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.9441229104995728, "rewards/margins": 15.36578369140625, "rewards/rejected": -16.309907913208008, "step": 3995 }, { "epoch": 1.36, "learning_rate": 4.853936413665294e-07, "logits/chosen": 0.05015997588634491, "logits/rejected": 0.07117754220962524, "logps/chosen": -232.6029815673828, "logps/rejected": -368.50067138671875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.1444251537322998, "rewards/margins": 17.139751434326172, "rewards/rejected": -18.284175872802734, "step": 3996 }, { "epoch": 1.36, "learning_rate": 4.849198134198093e-07, "logits/chosen": 0.033820655196905136, "logits/rejected": 0.05482599139213562, "logps/chosen": -223.5186767578125, "logps/rejected": -377.0498046875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.3907891511917114, "rewards/margins": 16.77354621887207, "rewards/rejected": -18.164335250854492, "step": 3997 }, { "epoch": 1.36, "learning_rate": 4.844461428229781e-07, "logits/chosen": 0.035053499042987823, "logits/rejected": 0.04558716341853142, "logps/chosen": -178.45004272460938, "logps/rejected": -387.2744140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.6531434059143066, "rewards/margins": 18.900362014770508, "rewards/rejected": -20.55350685119629, "step": 3998 }, { "epoch": 1.36, "learning_rate": 4.839726297207359e-07, "logits/chosen": 0.03408960998058319, "logits/rejected": 0.08344262838363647, "logps/chosen": -190.99493408203125, "logps/rejected": -348.9643249511719, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.954721987247467, "rewards/margins": 17.668441772460938, "rewards/rejected": -18.6231632232666, "step": 3999 }, { "epoch": 1.37, "learning_rate": 4.834992742577334e-07, "logits/chosen": 0.1537000834941864, "logits/rejected": 0.17090940475463867, "logps/chosen": -157.88897705078125, "logps/rejected": -322.8152770996094, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.1178615093231201, "rewards/margins": 16.37566375732422, "rewards/rejected": -17.493526458740234, "step": 4000 }, { "epoch": 1.37, "learning_rate": 4.830260765785749e-07, "logits/chosen": 0.08101120591163635, "logits/rejected": 0.11891186982393265, "logps/chosen": -199.80258178710938, "logps/rejected": -257.2725524902344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.3996431827545166, "rewards/margins": 12.595812797546387, "rewards/rejected": -12.995455741882324, "step": 4001 }, { "epoch": 1.37, "learning_rate": 4.825530368278159e-07, "logits/chosen": 0.0552302710711956, "logits/rejected": 0.08661597222089767, "logps/chosen": -187.34390258789062, "logps/rejected": -303.4490051269531, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.509468138217926, "rewards/margins": 14.690382957458496, "rewards/rejected": -15.199851036071777, "step": 4002 }, { "epoch": 1.37, "learning_rate": 4.820801551499628e-07, "logits/chosen": -0.05837269499897957, "logits/rejected": -0.04430806636810303, "logps/chosen": -204.83676147460938, "logps/rejected": -353.67364501953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7695522308349609, "rewards/margins": 14.18078327178955, "rewards/rejected": -14.950334548950195, "step": 4003 }, { "epoch": 1.37, "learning_rate": 4.816074316894749e-07, "logits/chosen": 0.01681986264884472, "logits/rejected": 0.07531873136758804, "logps/chosen": -214.41624450683594, "logps/rejected": -300.5152587890625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.40800416469573975, "rewards/margins": 15.485072135925293, "rewards/rejected": -15.077069282531738, "step": 4004 }, { "epoch": 1.37, "learning_rate": 4.811348665907627e-07, "logits/chosen": 0.03679744154214859, "logits/rejected": 0.052982479333877563, "logps/chosen": -170.02748107910156, "logps/rejected": -313.43731689453125, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -2.3732492923736572, "rewards/margins": 12.971109390258789, "rewards/rejected": -15.344358444213867, "step": 4005 }, { "epoch": 1.37, "learning_rate": 4.806624599981886e-07, "logits/chosen": 0.009726051241159439, "logits/rejected": 0.04279659688472748, "logps/chosen": -226.06063842773438, "logps/rejected": -347.7171630859375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -2.529430866241455, "rewards/margins": 15.083882331848145, "rewards/rejected": -17.613313674926758, "step": 4006 }, { "epoch": 1.37, "learning_rate": 4.801902120560656e-07, "logits/chosen": 0.02166035585105419, "logits/rejected": 0.02686605229973793, "logps/chosen": -179.24966430664062, "logps/rejected": -358.64788818359375, "loss": 0.0191, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4366941452026367, "rewards/margins": 14.47193717956543, "rewards/rejected": -15.908632278442383, "step": 4007 }, { "epoch": 1.37, "learning_rate": 4.797181229086593e-07, "logits/chosen": 0.01414380595088005, "logits/rejected": 0.06083670258522034, "logps/chosen": -232.95346069335938, "logps/rejected": -329.4305114746094, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.2568607330322266, "rewards/margins": 13.73299503326416, "rewards/rejected": -15.989855766296387, "step": 4008 }, { "epoch": 1.37, "learning_rate": 4.792461927001867e-07, "logits/chosen": -0.03596862033009529, "logits/rejected": 0.008701913058757782, "logps/chosen": -227.9806365966797, "logps/rejected": -323.04364013671875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0737184286117554, "rewards/margins": 13.637121200561523, "rewards/rejected": -14.71083927154541, "step": 4009 }, { "epoch": 1.37, "learning_rate": 4.78774421574815e-07, "logits/chosen": -0.0606701485812664, "logits/rejected": -0.02978368103504181, "logps/chosen": -241.7222442626953, "logps/rejected": -373.2565612792969, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.7551718950271606, "rewards/margins": 15.900836944580078, "rewards/rejected": -17.656005859375, "step": 4010 }, { "epoch": 1.37, "learning_rate": 4.783028096766653e-07, "logits/chosen": -0.13463595509529114, "logits/rejected": -0.11024612933397293, "logps/chosen": -254.39447021484375, "logps/rejected": -386.79473876953125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.4680434465408325, "rewards/margins": 14.985254287719727, "rewards/rejected": -15.45329761505127, "step": 4011 }, { "epoch": 1.37, "learning_rate": 4.778313571498073e-07, "logits/chosen": -0.028857318684458733, "logits/rejected": -0.005522422958165407, "logps/chosen": -222.60882568359375, "logps/rejected": -367.3580017089844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.625837802886963, "rewards/margins": 14.443832397460938, "rewards/rejected": -17.069671630859375, "step": 4012 }, { "epoch": 1.37, "learning_rate": 4.773600641382633e-07, "logits/chosen": -0.04267247021198273, "logits/rejected": -0.0018738056533038616, "logps/chosen": -212.43753051757812, "logps/rejected": -408.971923828125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.0928115844726562, "rewards/margins": 20.73813819885254, "rewards/rejected": -22.830951690673828, "step": 4013 }, { "epoch": 1.37, "learning_rate": 4.768889307860075e-07, "logits/chosen": 0.07143480330705643, "logits/rejected": 0.08615361899137497, "logps/chosen": -220.7054901123047, "logps/rejected": -447.63043212890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7664151787757874, "rewards/margins": 21.43378448486328, "rewards/rejected": -22.2002010345459, "step": 4014 }, { "epoch": 1.37, "learning_rate": 4.764179572369641e-07, "logits/chosen": -0.03393685817718506, "logits/rejected": -0.016150563955307007, "logps/chosen": -194.01295471191406, "logps/rejected": -298.45416259765625, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.12948445975780487, "rewards/margins": 14.131415367126465, "rewards/rejected": -14.260900497436523, "step": 4015 }, { "epoch": 1.37, "learning_rate": 4.759471436350091e-07, "logits/chosen": 0.03795485571026802, "logits/rejected": 0.07470695674419403, "logps/chosen": -188.6952362060547, "logps/rejected": -310.0950927734375, "loss": 0.0679, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7207822799682617, "rewards/margins": 14.541732788085938, "rewards/rejected": -17.262514114379883, "step": 4016 }, { "epoch": 1.37, "learning_rate": 4.7547649012397004e-07, "logits/chosen": -0.10302270948886871, "logits/rejected": -0.06880580633878708, "logps/chosen": -272.2216796875, "logps/rejected": -432.03131103515625, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": -1.611567497253418, "rewards/margins": 17.376930236816406, "rewards/rejected": -18.988496780395508, "step": 4017 }, { "epoch": 1.37, "learning_rate": 4.750059968476242e-07, "logits/chosen": 0.16408860683441162, "logits/rejected": 0.19565388560295105, "logps/chosen": -225.4398193359375, "logps/rejected": -357.6751708984375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.1238458156585693, "rewards/margins": 17.11239242553711, "rewards/rejected": -18.236236572265625, "step": 4018 }, { "epoch": 1.37, "learning_rate": 4.7453566394970134e-07, "logits/chosen": -0.07288531213998795, "logits/rejected": -0.01043781265616417, "logps/chosen": -249.74322509765625, "logps/rejected": -411.9828186035156, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.1771702766418457, "rewards/margins": 20.142335891723633, "rewards/rejected": -21.31950569152832, "step": 4019 }, { "epoch": 1.37, "learning_rate": 4.7406549157388156e-07, "logits/chosen": 0.030790455639362335, "logits/rejected": 0.042805179953575134, "logps/chosen": -153.9449005126953, "logps/rejected": -274.08843994140625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.091701030731201, "rewards/margins": 11.29007625579834, "rewards/rejected": -15.3817777633667, "step": 4020 }, { "epoch": 1.37, "learning_rate": 4.7359547986379643e-07, "logits/chosen": -0.05416027829051018, "logits/rejected": -0.012583901174366474, "logps/chosen": -220.6417236328125, "logps/rejected": -351.1495361328125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.9475978016853333, "rewards/margins": 15.996874809265137, "rewards/rejected": -16.944473266601562, "step": 4021 }, { "epoch": 1.37, "learning_rate": 4.731256289630272e-07, "logits/chosen": -0.05273059755563736, "logits/rejected": -0.013490185141563416, "logps/chosen": -218.6741180419922, "logps/rejected": -368.93115234375, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -2.761237859725952, "rewards/margins": 15.082257270812988, "rewards/rejected": -17.843496322631836, "step": 4022 }, { "epoch": 1.37, "learning_rate": 4.7265593901510735e-07, "logits/chosen": -0.009175106883049011, "logits/rejected": -0.005869944579899311, "logps/chosen": -203.04736328125, "logps/rejected": -400.5982971191406, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.9321813583374023, "rewards/margins": 18.836925506591797, "rewards/rejected": -20.769105911254883, "step": 4023 }, { "epoch": 1.37, "learning_rate": 4.7218641016352103e-07, "logits/chosen": -0.07136902958154678, "logits/rejected": -0.048567287623882294, "logps/chosen": -197.4814453125, "logps/rejected": -344.7147216796875, "loss": 0.0738, "rewards/accuracies": 1.0, "rewards/chosen": -2.932936429977417, "rewards/margins": 14.862181663513184, "rewards/rejected": -17.79511833190918, "step": 4024 }, { "epoch": 1.37, "learning_rate": 4.7171704255170163e-07, "logits/chosen": -0.01279948465526104, "logits/rejected": 0.03587555140256882, "logps/chosen": -202.94381713867188, "logps/rejected": -320.4440002441406, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.6624808311462402, "rewards/margins": 16.26999855041504, "rewards/rejected": -18.932479858398438, "step": 4025 }, { "epoch": 1.37, "learning_rate": 4.712478363230361e-07, "logits/chosen": 0.010241508483886719, "logits/rejected": 0.050553690642118454, "logps/chosen": -201.90106201171875, "logps/rejected": -304.9710388183594, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5827421545982361, "rewards/margins": 14.116076469421387, "rewards/rejected": -14.69881820678711, "step": 4026 }, { "epoch": 1.37, "learning_rate": 4.707787916208595e-07, "logits/chosen": 0.020857371389865875, "logits/rejected": 0.0371166430413723, "logps/chosen": -142.0360565185547, "logps/rejected": -286.18145751953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.8824305534362793, "rewards/margins": 13.556137084960938, "rewards/rejected": -15.438568115234375, "step": 4027 }, { "epoch": 1.37, "learning_rate": 4.70309908588458e-07, "logits/chosen": -0.06724917888641357, "logits/rejected": -0.04112290218472481, "logps/chosen": -172.6056365966797, "logps/rejected": -307.7748107910156, "loss": 0.0483, "rewards/accuracies": 0.875, "rewards/chosen": -3.033395290374756, "rewards/margins": 13.31048583984375, "rewards/rejected": -16.343881607055664, "step": 4028 }, { "epoch": 1.38, "learning_rate": 4.698411873690703e-07, "logits/chosen": -0.09072483330965042, "logits/rejected": -0.09433206170797348, "logps/chosen": -202.96847534179688, "logps/rejected": -402.3513488769531, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.4340452551841736, "rewards/margins": 17.602096557617188, "rewards/rejected": -18.03614044189453, "step": 4029 }, { "epoch": 1.38, "learning_rate": 4.6937262810588307e-07, "logits/chosen": -0.06343471258878708, "logits/rejected": -0.017779316753149033, "logps/chosen": -150.3102569580078, "logps/rejected": -333.109130859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3973557949066162, "rewards/margins": 16.92966079711914, "rewards/rejected": -18.327014923095703, "step": 4030 }, { "epoch": 1.38, "learning_rate": 4.689042309420351e-07, "logits/chosen": -0.005227392073720694, "logits/rejected": 0.05941110476851463, "logps/chosen": -217.7210235595703, "logps/rejected": -297.9700927734375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.714763879776001, "rewards/margins": 15.052202224731445, "rewards/rejected": -16.766965866088867, "step": 4031 }, { "epoch": 1.38, "learning_rate": 4.684359960206158e-07, "logits/chosen": -0.041845377534627914, "logits/rejected": 0.00013552028394769877, "logps/chosen": -245.59988403320312, "logps/rejected": -392.6980895996094, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.445582151412964, "rewards/margins": 16.988466262817383, "rewards/rejected": -19.43404769897461, "step": 4032 }, { "epoch": 1.38, "learning_rate": 4.6796792348466353e-07, "logits/chosen": -0.17796677350997925, "logits/rejected": -0.13706444203853607, "logps/chosen": -297.631103515625, "logps/rejected": -420.3207092285156, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.12289716303348541, "rewards/margins": 16.207534790039062, "rewards/rejected": -16.330432891845703, "step": 4033 }, { "epoch": 1.38, "learning_rate": 4.675000134771684e-07, "logits/chosen": -0.003323513548821211, "logits/rejected": 0.031270645558834076, "logps/chosen": -101.05703735351562, "logps/rejected": -198.2556610107422, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8741739988327026, "rewards/margins": 11.024036407470703, "rewards/rejected": -11.898208618164062, "step": 4034 }, { "epoch": 1.38, "learning_rate": 4.670322661410705e-07, "logits/chosen": -0.055089302361011505, "logits/rejected": -0.005057508125901222, "logps/chosen": -248.0640411376953, "logps/rejected": -473.0320129394531, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0866518020629883, "rewards/margins": 23.120695114135742, "rewards/rejected": -25.20734977722168, "step": 4035 }, { "epoch": 1.38, "learning_rate": 4.6656468161926055e-07, "logits/chosen": -0.02532881312072277, "logits/rejected": -0.020008796826004982, "logps/chosen": -254.04803466796875, "logps/rejected": -447.0602722167969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0251224040985107, "rewards/margins": 16.93260955810547, "rewards/rejected": -18.957731246948242, "step": 4036 }, { "epoch": 1.38, "learning_rate": 4.660972600545785e-07, "logits/chosen": -0.029832489788532257, "logits/rejected": 0.0034644207917153835, "logps/chosen": -225.38656616210938, "logps/rejected": -354.8149719238281, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.3689308166503906, "rewards/margins": 15.521780967712402, "rewards/rejected": -17.890710830688477, "step": 4037 }, { "epoch": 1.38, "learning_rate": 4.656300015898154e-07, "logits/chosen": 0.044629115611314774, "logits/rejected": 0.0631936639547348, "logps/chosen": -165.5621337890625, "logps/rejected": -328.3230285644531, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.669831395149231, "rewards/margins": 15.47973918914795, "rewards/rejected": -17.14957046508789, "step": 4038 }, { "epoch": 1.38, "learning_rate": 4.651629063677127e-07, "logits/chosen": 0.010797116905450821, "logits/rejected": 0.03306448087096214, "logps/chosen": -207.24661254882812, "logps/rejected": -337.30633544921875, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": -1.7243475914001465, "rewards/margins": 13.680694580078125, "rewards/rejected": -15.405041694641113, "step": 4039 }, { "epoch": 1.38, "learning_rate": 4.646959745309609e-07, "logits/chosen": 0.013414108194410801, "logits/rejected": 0.042770445346832275, "logps/chosen": -215.6936798095703, "logps/rejected": -382.4044494628906, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.5626815557479858, "rewards/margins": 20.575119018554688, "rewards/rejected": -22.137800216674805, "step": 4040 }, { "epoch": 1.38, "learning_rate": 4.642292062222015e-07, "logits/chosen": 0.05331529676914215, "logits/rejected": 0.10137521475553513, "logps/chosen": -171.92691040039062, "logps/rejected": -294.92059326171875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.4728708267211914, "rewards/margins": 13.339798927307129, "rewards/rejected": -15.812667846679688, "step": 4041 }, { "epoch": 1.38, "learning_rate": 4.637626015840261e-07, "logits/chosen": 0.13357140123844147, "logits/rejected": 0.17192919552326202, "logps/chosen": -147.75949096679688, "logps/rejected": -306.18951416015625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.040684700012207, "rewards/margins": 16.86792755126953, "rewards/rejected": -18.908611297607422, "step": 4042 }, { "epoch": 1.38, "learning_rate": 4.6329616075897505e-07, "logits/chosen": 0.012706036679446697, "logits/rejected": 0.028811339288949966, "logps/chosen": -196.19998168945312, "logps/rejected": -322.3418884277344, "loss": 0.0387, "rewards/accuracies": 0.9375, "rewards/chosen": -2.697859525680542, "rewards/margins": 12.906959533691406, "rewards/rejected": -15.604820251464844, "step": 4043 }, { "epoch": 1.38, "learning_rate": 4.628298838895409e-07, "logits/chosen": 0.05678195506334305, "logits/rejected": 0.06915491074323654, "logps/chosen": -193.32078552246094, "logps/rejected": -332.585693359375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.5169347524642944, "rewards/margins": 14.82509994506836, "rewards/rejected": -15.342033386230469, "step": 4044 }, { "epoch": 1.38, "learning_rate": 4.6236377111816415e-07, "logits/chosen": -0.06666299700737, "logits/rejected": -0.04722420871257782, "logps/chosen": -239.7940673828125, "logps/rejected": -406.51348876953125, "loss": 0.0949, "rewards/accuracies": 1.0, "rewards/chosen": -3.5155105590820312, "rewards/margins": 17.07822036743164, "rewards/rejected": -20.593730926513672, "step": 4045 }, { "epoch": 1.38, "learning_rate": 4.61897822587235e-07, "logits/chosen": -0.04827510565519333, "logits/rejected": 0.01462537795305252, "logps/chosen": -269.64398193359375, "logps/rejected": -390.6072692871094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5211749076843262, "rewards/margins": 16.468637466430664, "rewards/rejected": -17.98981285095215, "step": 4046 }, { "epoch": 1.38, "learning_rate": 4.6143203843909585e-07, "logits/chosen": 0.05454927310347557, "logits/rejected": 0.0762854740023613, "logps/chosen": -157.00343322753906, "logps/rejected": -253.65232849121094, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.1477961540222168, "rewards/margins": 12.167378425598145, "rewards/rejected": -13.315176010131836, "step": 4047 }, { "epoch": 1.38, "learning_rate": 4.6096641881603615e-07, "logits/chosen": -0.08207182586193085, "logits/rejected": -0.07166574150323868, "logps/chosen": -197.33395385742188, "logps/rejected": -362.86669921875, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -1.9495009183883667, "rewards/margins": 15.607791900634766, "rewards/rejected": -17.557292938232422, "step": 4048 }, { "epoch": 1.38, "learning_rate": 4.6050096386029666e-07, "logits/chosen": -0.024669811129570007, "logits/rejected": -0.021449103951454163, "logps/chosen": -176.1506805419922, "logps/rejected": -320.80950927734375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.6231085062026978, "rewards/margins": 13.17689323425293, "rewards/rejected": -13.800002098083496, "step": 4049 }, { "epoch": 1.38, "learning_rate": 4.6003567371406784e-07, "logits/chosen": -0.08560194820165634, "logits/rejected": -0.05635340139269829, "logps/chosen": -233.85073852539062, "logps/rejected": -351.987060546875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.0612807273864746, "rewards/margins": 15.694747924804688, "rewards/rejected": -16.756027221679688, "step": 4050 }, { "epoch": 1.38, "learning_rate": 4.5957054851948876e-07, "logits/chosen": -0.06307364255189896, "logits/rejected": -0.06371045857667923, "logps/chosen": -202.76754760742188, "logps/rejected": -373.92095947265625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.5043874979019165, "rewards/margins": 15.51397705078125, "rewards/rejected": -17.01836395263672, "step": 4051 }, { "epoch": 1.38, "learning_rate": 4.5910558841864886e-07, "logits/chosen": -0.08622459322214127, "logits/rejected": -0.01992175169289112, "logps/chosen": -259.63861083984375, "logps/rejected": -385.73077392578125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.4928151369094849, "rewards/margins": 19.26898765563965, "rewards/rejected": -20.76180076599121, "step": 4052 }, { "epoch": 1.38, "learning_rate": 4.5864079355358733e-07, "logits/chosen": -0.014665216207504272, "logits/rejected": 0.006346693728119135, "logps/chosen": -148.3218231201172, "logps/rejected": -304.0546569824219, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.081010341644287, "rewards/margins": 14.586906433105469, "rewards/rejected": -16.667917251586914, "step": 4053 }, { "epoch": 1.38, "learning_rate": 4.5817616406629266e-07, "logits/chosen": 0.013872576877474785, "logits/rejected": 0.0336996391415596, "logps/chosen": -223.2608642578125, "logps/rejected": -421.6099853515625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.916783094406128, "rewards/margins": 16.349498748779297, "rewards/rejected": -18.266279220581055, "step": 4054 }, { "epoch": 1.38, "learning_rate": 4.5771170009870233e-07, "logits/chosen": 0.09835096448659897, "logits/rejected": 0.11528096348047256, "logps/chosen": -167.34434509277344, "logps/rejected": -318.9131164550781, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.6127196550369263, "rewards/margins": 15.105955123901367, "rewards/rejected": -16.71867561340332, "step": 4055 }, { "epoch": 1.38, "learning_rate": 4.572474017927037e-07, "logits/chosen": -0.041018396615982056, "logits/rejected": -0.015265511348843575, "logps/chosen": -206.3190460205078, "logps/rejected": -388.55413818359375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.6742751598358154, "rewards/margins": 17.773456573486328, "rewards/rejected": -19.447731018066406, "step": 4056 }, { "epoch": 1.38, "learning_rate": 4.5678326929013424e-07, "logits/chosen": -0.07255242019891739, "logits/rejected": -0.046357735991477966, "logps/chosen": -257.0334167480469, "logps/rejected": -411.08306884765625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.3774845600128174, "rewards/margins": 17.03327178955078, "rewards/rejected": -18.410757064819336, "step": 4057 }, { "epoch": 1.38, "learning_rate": 4.5631930273277864e-07, "logits/chosen": -0.12290448695421219, "logits/rejected": -0.0975174829363823, "logps/chosen": -209.452392578125, "logps/rejected": -437.91229248046875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.5062897205352783, "rewards/margins": 20.113353729248047, "rewards/rejected": -22.61964225769043, "step": 4058 }, { "epoch": 1.39, "learning_rate": 4.558555022623738e-07, "logits/chosen": -0.02654549852013588, "logits/rejected": 0.00016287904873024672, "logps/chosen": -190.67349243164062, "logps/rejected": -336.7407531738281, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.9298672676086426, "rewards/margins": 13.004255294799805, "rewards/rejected": -16.93412208557129, "step": 4059 }, { "epoch": 1.39, "learning_rate": 4.553918680206037e-07, "logits/chosen": -0.01746930368244648, "logits/rejected": -0.016172336414456367, "logps/chosen": -199.7490692138672, "logps/rejected": -362.1811218261719, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.788539171218872, "rewards/margins": 16.71744728088379, "rewards/rejected": -19.505983352661133, "step": 4060 }, { "epoch": 1.39, "learning_rate": 4.549284001491014e-07, "logits/chosen": -0.10018701106309891, "logits/rejected": -0.09043817967176437, "logps/chosen": -238.83355712890625, "logps/rejected": -374.3035888671875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.24816209077835083, "rewards/margins": 16.164186477661133, "rewards/rejected": -15.916025161743164, "step": 4061 }, { "epoch": 1.39, "learning_rate": 4.544650987894514e-07, "logits/chosen": 0.11328141391277313, "logits/rejected": 0.10997651517391205, "logps/chosen": -207.3127899169922, "logps/rejected": -391.7012939453125, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.2592430114746094, "rewards/margins": 15.241661071777344, "rewards/rejected": -16.500904083251953, "step": 4062 }, { "epoch": 1.39, "learning_rate": 4.5400196408318493e-07, "logits/chosen": 0.017639268189668655, "logits/rejected": 0.06986306607723236, "logps/chosen": -245.32345581054688, "logps/rejected": -351.8782653808594, "loss": 0.0568, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6243996620178223, "rewards/margins": 16.42058753967285, "rewards/rejected": -19.044986724853516, "step": 4063 }, { "epoch": 1.39, "learning_rate": 4.535389961717834e-07, "logits/chosen": 0.026669075712561607, "logits/rejected": 0.05355852469801903, "logps/chosen": -208.84005737304688, "logps/rejected": -370.6912841796875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.9710993766784668, "rewards/margins": 17.534231185913086, "rewards/rejected": -18.505329132080078, "step": 4064 }, { "epoch": 1.39, "learning_rate": 4.530761951966776e-07, "logits/chosen": -0.09568852186203003, "logits/rejected": -0.11305776983499527, "logps/chosen": -194.94923400878906, "logps/rejected": -465.12396240234375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.629145622253418, "rewards/margins": 21.622920989990234, "rewards/rejected": -23.252065658569336, "step": 4065 }, { "epoch": 1.39, "learning_rate": 4.5261356129924613e-07, "logits/chosen": -0.10412218421697617, "logits/rejected": -0.07893718034029007, "logps/chosen": -215.7796630859375, "logps/rejected": -349.7401428222656, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.564697504043579, "rewards/margins": 14.05022144317627, "rewards/rejected": -15.614917755126953, "step": 4066 }, { "epoch": 1.39, "learning_rate": 4.521510946208175e-07, "logits/chosen": 0.020541610196232796, "logits/rejected": 0.04398711025714874, "logps/chosen": -219.60772705078125, "logps/rejected": -392.93670654296875, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.17144957184791565, "rewards/margins": 18.63119125366211, "rewards/rejected": -18.802642822265625, "step": 4067 }, { "epoch": 1.39, "learning_rate": 4.51688795302669e-07, "logits/chosen": -0.10479984432458878, "logits/rejected": -0.08496299386024475, "logps/chosen": -206.54896545410156, "logps/rejected": -333.9802551269531, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.9917333126068115, "rewards/margins": 14.1402006149292, "rewards/rejected": -15.13193416595459, "step": 4068 }, { "epoch": 1.39, "learning_rate": 4.512266634860271e-07, "logits/chosen": -0.007660967297852039, "logits/rejected": 0.03626580536365509, "logps/chosen": -189.29306030273438, "logps/rejected": -321.8530578613281, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7400495409965515, "rewards/margins": 17.29079246520996, "rewards/rejected": -18.030841827392578, "step": 4069 }, { "epoch": 1.39, "learning_rate": 4.5076469931206583e-07, "logits/chosen": 0.05449647828936577, "logits/rejected": 0.08201830089092255, "logps/chosen": -187.49142456054688, "logps/rejected": -342.2496337890625, "loss": 0.0358, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7466261386871338, "rewards/margins": 17.18242645263672, "rewards/rejected": -18.92905044555664, "step": 4070 }, { "epoch": 1.39, "learning_rate": 4.5030290292190933e-07, "logits/chosen": 0.06870616227388382, "logits/rejected": 0.09426841884851456, "logps/chosen": -227.3690643310547, "logps/rejected": -351.59588623046875, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": 0.3992937505245209, "rewards/margins": 16.84033203125, "rewards/rejected": -16.441038131713867, "step": 4071 }, { "epoch": 1.39, "learning_rate": 4.4984127445663046e-07, "logits/chosen": 0.051352791488170624, "logits/rejected": 0.08957496285438538, "logps/chosen": -186.027587890625, "logps/rejected": -306.1395568847656, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -4.157522201538086, "rewards/margins": 13.257896423339844, "rewards/rejected": -17.41541862487793, "step": 4072 }, { "epoch": 1.39, "learning_rate": 4.4937981405724903e-07, "logits/chosen": 0.034814704209566116, "logits/rejected": 0.06587938964366913, "logps/chosen": -236.3774871826172, "logps/rejected": -354.55535888671875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 0.000714033842086792, "rewards/margins": 14.03253173828125, "rewards/rejected": -14.031817436218262, "step": 4073 }, { "epoch": 1.39, "learning_rate": 4.489185218647364e-07, "logits/chosen": 0.11231143772602081, "logits/rejected": 0.14030954241752625, "logps/chosen": -209.75779724121094, "logps/rejected": -348.44482421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.4729825258255005, "rewards/margins": 15.257929801940918, "rewards/rejected": -16.730911254882812, "step": 4074 }, { "epoch": 1.39, "learning_rate": 4.484573980200104e-07, "logits/chosen": -0.006376573815941811, "logits/rejected": 0.010338428430259228, "logps/chosen": -217.64981079101562, "logps/rejected": -326.3454895019531, "loss": 0.0175, "rewards/accuracies": 0.9375, "rewards/chosen": -2.423304557800293, "rewards/margins": 11.637529373168945, "rewards/rejected": -14.060832023620605, "step": 4075 }, { "epoch": 1.39, "learning_rate": 4.47996442663937e-07, "logits/chosen": -0.06216893717646599, "logits/rejected": 0.0025751818902790546, "logps/chosen": -285.9512023925781, "logps/rejected": -331.4557189941406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5768178701400757, "rewards/margins": 11.588042259216309, "rewards/rejected": -13.1648588180542, "step": 4076 }, { "epoch": 1.39, "learning_rate": 4.4753565593733334e-07, "logits/chosen": 0.010284320451319218, "logits/rejected": 0.04963882640004158, "logps/chosen": -196.3470458984375, "logps/rejected": -367.1040954589844, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.676895260810852, "rewards/margins": 18.43846893310547, "rewards/rejected": -20.11536407470703, "step": 4077 }, { "epoch": 1.39, "learning_rate": 4.4707503798096257e-07, "logits/chosen": 0.030875472351908684, "logits/rejected": 0.06586718559265137, "logps/chosen": -129.7112274169922, "logps/rejected": -252.23670959472656, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -1.8407236337661743, "rewards/margins": 12.969269752502441, "rewards/rejected": -14.8099946975708, "step": 4078 }, { "epoch": 1.39, "learning_rate": 4.4661458893553637e-07, "logits/chosen": 0.021831266582012177, "logits/rejected": 0.04051053151488304, "logps/chosen": -144.13302612304688, "logps/rejected": -313.654541015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.8924363851547241, "rewards/margins": 16.66750717163086, "rewards/rejected": -15.77507209777832, "step": 4079 }, { "epoch": 1.39, "learning_rate": 4.461543089417172e-07, "logits/chosen": -0.054188698530197144, "logits/rejected": -0.04123084619641304, "logps/chosen": -182.35362243652344, "logps/rejected": -299.1778259277344, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.1876200437545776, "rewards/margins": 13.522080421447754, "rewards/rejected": -14.709700584411621, "step": 4080 }, { "epoch": 1.39, "learning_rate": 4.456941981401131e-07, "logits/chosen": -0.0956675186753273, "logits/rejected": -0.07169174402952194, "logps/chosen": -272.91748046875, "logps/rejected": -393.114990234375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9472494125366211, "rewards/margins": 17.022048950195312, "rewards/rejected": -17.96929931640625, "step": 4081 }, { "epoch": 1.39, "learning_rate": 4.4523425667128177e-07, "logits/chosen": 0.02250407636165619, "logits/rejected": 0.038144905120134354, "logps/chosen": -190.15829467773438, "logps/rejected": -346.4613342285156, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.3527634143829346, "rewards/margins": 16.523216247558594, "rewards/rejected": -18.875978469848633, "step": 4082 }, { "epoch": 1.39, "learning_rate": 4.447744846757295e-07, "logits/chosen": -0.0032339063473045826, "logits/rejected": -0.005722529254853725, "logps/chosen": -152.74685668945312, "logps/rejected": -356.5133361816406, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.625186562538147, "rewards/margins": 17.33932113647461, "rewards/rejected": -18.964508056640625, "step": 4083 }, { "epoch": 1.39, "learning_rate": 4.4431488229390944e-07, "logits/chosen": -0.0950942263007164, "logits/rejected": -0.05386989191174507, "logps/chosen": -260.7254333496094, "logps/rejected": -388.1478271484375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.22232066094875336, "rewards/margins": 17.458242416381836, "rewards/rejected": -17.680564880371094, "step": 4084 }, { "epoch": 1.39, "learning_rate": 4.4385544966622436e-07, "logits/chosen": -0.08940919488668442, "logits/rejected": -0.06388247013092041, "logps/chosen": -195.38710021972656, "logps/rejected": -339.42156982421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9859341382980347, "rewards/margins": 14.624706268310547, "rewards/rejected": -15.610640525817871, "step": 4085 }, { "epoch": 1.39, "learning_rate": 4.4339618693302436e-07, "logits/chosen": 0.0008259209571406245, "logits/rejected": 0.02712024189531803, "logps/chosen": -205.4402313232422, "logps/rejected": -397.808837890625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.5357755422592163, "rewards/margins": 17.999874114990234, "rewards/rejected": -19.53565216064453, "step": 4086 }, { "epoch": 1.39, "learning_rate": 4.4293709423460834e-07, "logits/chosen": -0.042503904551267624, "logits/rejected": -0.029540354385972023, "logps/chosen": -213.65072631835938, "logps/rejected": -388.1560363769531, "loss": 0.0253, "rewards/accuracies": 0.9375, "rewards/chosen": -2.227057456970215, "rewards/margins": 16.14432716369629, "rewards/rejected": -18.371383666992188, "step": 4087 }, { "epoch": 1.4, "learning_rate": 4.424781717112219e-07, "logits/chosen": -0.07429155707359314, "logits/rejected": -0.036579981446266174, "logps/chosen": -191.80368041992188, "logps/rejected": -355.2421569824219, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.5040550231933594, "rewards/margins": 16.74271583557129, "rewards/rejected": -19.246768951416016, "step": 4088 }, { "epoch": 1.4, "learning_rate": 4.420194195030602e-07, "logits/chosen": -0.021718231961131096, "logits/rejected": 0.0009914442198351026, "logps/chosen": -268.0975036621094, "logps/rejected": -440.49896240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9384376406669617, "rewards/margins": 20.099592208862305, "rewards/rejected": -21.038028717041016, "step": 4089 }, { "epoch": 1.4, "learning_rate": 4.4156083775026585e-07, "logits/chosen": -0.0031860133167356253, "logits/rejected": 0.013221465051174164, "logps/chosen": -236.98374938964844, "logps/rejected": -368.5512390136719, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": -1.8797862529754639, "rewards/margins": 14.090351104736328, "rewards/rejected": -15.970136642456055, "step": 4090 }, { "epoch": 1.4, "learning_rate": 4.411024265929283e-07, "logits/chosen": -0.10115321725606918, "logits/rejected": -0.07229434698820114, "logps/chosen": -215.83448791503906, "logps/rejected": -384.2933044433594, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -2.3101751804351807, "rewards/margins": 16.45374298095703, "rewards/rejected": -18.763916015625, "step": 4091 }, { "epoch": 1.4, "learning_rate": 4.40644186171087e-07, "logits/chosen": 0.07971838861703873, "logits/rejected": 0.07731067389249802, "logps/chosen": -185.56842041015625, "logps/rejected": -349.40313720703125, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.965608835220337, "rewards/margins": 14.21629810333252, "rewards/rejected": -17.181907653808594, "step": 4092 }, { "epoch": 1.4, "learning_rate": 4.401861166247277e-07, "logits/chosen": -0.0384148508310318, "logits/rejected": -0.01795334368944168, "logps/chosen": -159.00680541992188, "logps/rejected": -299.9733581542969, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.4195435047149658, "rewards/margins": 13.169938087463379, "rewards/rejected": -14.589483261108398, "step": 4093 }, { "epoch": 1.4, "learning_rate": 4.3972821809378336e-07, "logits/chosen": -0.0127113563939929, "logits/rejected": 0.034784190356731415, "logps/chosen": -186.5973663330078, "logps/rejected": -316.7103271484375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.75969398021698, "rewards/margins": 15.614346504211426, "rewards/rejected": -16.374040603637695, "step": 4094 }, { "epoch": 1.4, "learning_rate": 4.3927049071813726e-07, "logits/chosen": -0.11311449855566025, "logits/rejected": -0.08141420036554337, "logps/chosen": -204.1111602783203, "logps/rejected": -377.381591796875, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.1616134643554688, "rewards/margins": 17.93622589111328, "rewards/rejected": -19.097837448120117, "step": 4095 }, { "epoch": 1.4, "learning_rate": 4.388129346376177e-07, "logits/chosen": -0.07965318858623505, "logits/rejected": -0.0646844357252121, "logps/chosen": -241.86375427246094, "logps/rejected": -382.4038391113281, "loss": 0.0679, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7821407318115234, "rewards/margins": 14.345355033874512, "rewards/rejected": -16.12749481201172, "step": 4096 }, { "epoch": 1.4, "learning_rate": 4.3835554999200196e-07, "logits/chosen": 0.0016141472151502967, "logits/rejected": 0.029149439185857773, "logps/chosen": -204.76284790039062, "logps/rejected": -387.8768005371094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.932692527770996, "rewards/margins": 17.379690170288086, "rewards/rejected": -21.31238555908203, "step": 4097 }, { "epoch": 1.4, "learning_rate": 4.378983369210152e-07, "logits/chosen": 0.012102121487259865, "logits/rejected": 0.07734429836273193, "logps/chosen": -230.1703643798828, "logps/rejected": -335.8689270019531, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.5016356706619263, "rewards/margins": 16.720766067504883, "rewards/rejected": -18.222402572631836, "step": 4098 }, { "epoch": 1.4, "learning_rate": 4.3744129556432894e-07, "logits/chosen": 0.08271021395921707, "logits/rejected": 0.10503914952278137, "logps/chosen": -125.78326416015625, "logps/rejected": -326.1165466308594, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.6228322982788086, "rewards/margins": 17.029029846191406, "rewards/rejected": -19.6518611907959, "step": 4099 }, { "epoch": 1.4, "learning_rate": 4.369844260615635e-07, "logits/chosen": -0.16144545376300812, "logits/rejected": -0.13244086503982544, "logps/chosen": -211.65538024902344, "logps/rejected": -378.506591796875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.2589211463928223, "rewards/margins": 15.818639755249023, "rewards/rejected": -19.077560424804688, "step": 4100 }, { "epoch": 1.4, "learning_rate": 4.3652772855228615e-07, "logits/chosen": 0.0830201506614685, "logits/rejected": 0.11713223904371262, "logps/chosen": -212.37567138671875, "logps/rejected": -343.1712646484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.19346866011619568, "rewards/margins": 16.714414596557617, "rewards/rejected": -16.907882690429688, "step": 4101 }, { "epoch": 1.4, "learning_rate": 4.360712031760119e-07, "logits/chosen": 0.015702439472079277, "logits/rejected": 0.024155404418706894, "logps/chosen": -182.8029022216797, "logps/rejected": -425.2427978515625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.2340843677520752, "rewards/margins": 19.919891357421875, "rewards/rejected": -21.153976440429688, "step": 4102 }, { "epoch": 1.4, "learning_rate": 4.3561485007220234e-07, "logits/chosen": 0.04506637156009674, "logits/rejected": 0.07468057423830032, "logps/chosen": -175.97242736816406, "logps/rejected": -285.36627197265625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.9417916536331177, "rewards/margins": 13.988487243652344, "rewards/rejected": -15.930277824401855, "step": 4103 }, { "epoch": 1.4, "learning_rate": 4.3515866938026737e-07, "logits/chosen": 0.007827539928257465, "logits/rejected": 0.013835740275681019, "logps/chosen": -183.03785705566406, "logps/rejected": -399.03729248046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2797554731369019, "rewards/margins": 18.822938919067383, "rewards/rejected": -20.10269546508789, "step": 4104 }, { "epoch": 1.4, "learning_rate": 4.3470266123956433e-07, "logits/chosen": -0.10328517854213715, "logits/rejected": -0.08032090216875076, "logps/chosen": -232.31781005859375, "logps/rejected": -415.3024597167969, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -3.509969711303711, "rewards/margins": 15.796448707580566, "rewards/rejected": -19.30641746520996, "step": 4105 }, { "epoch": 1.4, "learning_rate": 4.342468257893963e-07, "logits/chosen": -0.09345223754644394, "logits/rejected": -0.08109904825687408, "logps/chosen": -209.0762939453125, "logps/rejected": -376.0558776855469, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.9378026723861694, "rewards/margins": 14.998871803283691, "rewards/rejected": -16.936676025390625, "step": 4106 }, { "epoch": 1.4, "learning_rate": 4.337911631690162e-07, "logits/chosen": 0.0531369186937809, "logits/rejected": 0.10347914695739746, "logps/chosen": -132.4622802734375, "logps/rejected": -319.3292541503906, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.6112078428268433, "rewards/margins": 16.463970184326172, "rewards/rejected": -18.075176239013672, "step": 4107 }, { "epoch": 1.4, "learning_rate": 4.333356735176218e-07, "logits/chosen": -0.03984267637133598, "logits/rejected": 0.010706105269491673, "logps/chosen": -264.1613464355469, "logps/rejected": -323.5574035644531, "loss": 0.0288, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5618680715560913, "rewards/margins": 14.366926193237305, "rewards/rejected": -15.928794860839844, "step": 4108 }, { "epoch": 1.4, "learning_rate": 4.328803569743583e-07, "logits/chosen": 0.0991966649889946, "logits/rejected": 0.12733197212219238, "logps/chosen": -201.8123321533203, "logps/rejected": -306.5245056152344, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.9299345016479492, "rewards/margins": 15.229668617248535, "rewards/rejected": -17.159603118896484, "step": 4109 }, { "epoch": 1.4, "learning_rate": 4.324252136783201e-07, "logits/chosen": 0.03400491178035736, "logits/rejected": 0.06427153199911118, "logps/chosen": -194.7328338623047, "logps/rejected": -334.74139404296875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -2.917665481567383, "rewards/margins": 14.9369478225708, "rewards/rejected": -17.8546142578125, "step": 4110 }, { "epoch": 1.4, "learning_rate": 4.31970243768546e-07, "logits/chosen": 0.14323385059833527, "logits/rejected": 0.16170424222946167, "logps/chosen": -125.1041488647461, "logps/rejected": -249.48297119140625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -3.1903278827667236, "rewards/margins": 12.598135948181152, "rewards/rejected": -15.788464546203613, "step": 4111 }, { "epoch": 1.4, "learning_rate": 4.3151544738402345e-07, "logits/chosen": -0.04310264438390732, "logits/rejected": -0.016523363068699837, "logps/chosen": -256.35736083984375, "logps/rejected": -413.3529052734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8186661005020142, "rewards/margins": 16.857763290405273, "rewards/rejected": -17.676427841186523, "step": 4112 }, { "epoch": 1.4, "learning_rate": 4.3106082466368676e-07, "logits/chosen": 0.0486762560904026, "logits/rejected": 0.043970219790935516, "logps/chosen": -223.85940551757812, "logps/rejected": -418.59930419921875, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -1.1690080165863037, "rewards/margins": 18.27078628540039, "rewards/rejected": -19.439794540405273, "step": 4113 }, { "epoch": 1.4, "learning_rate": 4.3060637574641633e-07, "logits/chosen": 0.06991979479789734, "logits/rejected": 0.10506102442741394, "logps/chosen": -122.38978576660156, "logps/rejected": -200.4901885986328, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.3806848526000977, "rewards/margins": 10.542381286621094, "rewards/rejected": -11.923068046569824, "step": 4114 }, { "epoch": 1.4, "learning_rate": 4.3015210077104035e-07, "logits/chosen": -0.008349444717168808, "logits/rejected": 0.049114130437374115, "logps/chosen": -176.65147399902344, "logps/rejected": -372.19195556640625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.31409287452697754, "rewards/margins": 19.36836051940918, "rewards/rejected": -19.68245506286621, "step": 4115 }, { "epoch": 1.4, "learning_rate": 4.296979998763338e-07, "logits/chosen": -0.062473513185977936, "logits/rejected": -0.03777269273996353, "logps/chosen": -246.1356964111328, "logps/rejected": -316.1687316894531, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.980377197265625, "rewards/margins": 13.061899185180664, "rewards/rejected": -14.042274475097656, "step": 4116 }, { "epoch": 1.41, "learning_rate": 4.2924407320101765e-07, "logits/chosen": -0.017853576689958572, "logits/rejected": 0.0436212457716465, "logps/chosen": -234.15528869628906, "logps/rejected": -354.984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5981072783470154, "rewards/margins": 17.387025833129883, "rewards/rejected": -17.985132217407227, "step": 4117 }, { "epoch": 1.41, "learning_rate": 4.287903208837604e-07, "logits/chosen": -0.004192440304905176, "logits/rejected": 0.03132178634405136, "logps/chosen": -226.39541625976562, "logps/rejected": -327.8284606933594, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.06535254418849945, "rewards/margins": 15.653998374938965, "rewards/rejected": -15.588644027709961, "step": 4118 }, { "epoch": 1.41, "learning_rate": 4.2833674306317736e-07, "logits/chosen": -0.06312093883752823, "logits/rejected": -0.02214050479233265, "logps/chosen": -197.5845184326172, "logps/rejected": -337.998046875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.6392176747322083, "rewards/margins": 15.176894187927246, "rewards/rejected": -15.81611156463623, "step": 4119 }, { "epoch": 1.41, "learning_rate": 4.278833398778305e-07, "logits/chosen": -0.05905793234705925, "logits/rejected": -0.023601947352290154, "logps/chosen": -256.0539245605469, "logps/rejected": -445.3591613769531, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.0723787248134613, "rewards/margins": 21.395841598510742, "rewards/rejected": -21.468219757080078, "step": 4120 }, { "epoch": 1.41, "learning_rate": 4.274301114662277e-07, "logits/chosen": -0.047918640077114105, "logits/rejected": -0.006375740747898817, "logps/chosen": -176.72201538085938, "logps/rejected": -329.8042907714844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.634273648262024, "rewards/margins": 17.694372177124023, "rewards/rejected": -19.328645706176758, "step": 4121 }, { "epoch": 1.41, "learning_rate": 4.2697705796682405e-07, "logits/chosen": 0.02917778491973877, "logits/rejected": 0.0720633938908577, "logps/chosen": -205.77145385742188, "logps/rejected": -289.1895446777344, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.12831449508667, "rewards/margins": 13.423090934753418, "rewards/rejected": -15.551405906677246, "step": 4122 }, { "epoch": 1.41, "learning_rate": 4.265241795180218e-07, "logits/chosen": 0.008813612163066864, "logits/rejected": 0.03618000075221062, "logps/chosen": -205.96664428710938, "logps/rejected": -361.3487243652344, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.567179560661316, "rewards/margins": 16.28215789794922, "rewards/rejected": -17.84933853149414, "step": 4123 }, { "epoch": 1.41, "learning_rate": 4.2607147625816766e-07, "logits/chosen": -0.035220831632614136, "logits/rejected": -0.011038231663405895, "logps/chosen": -231.12062072753906, "logps/rejected": -398.8937683105469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.65983247756958, "rewards/margins": 17.3320369720459, "rewards/rejected": -18.99186897277832, "step": 4124 }, { "epoch": 1.41, "learning_rate": 4.25618948325558e-07, "logits/chosen": -0.05867545306682587, "logits/rejected": -0.04865572229027748, "logps/chosen": -204.02294921875, "logps/rejected": -332.18328857421875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.824016809463501, "rewards/margins": 14.801813125610352, "rewards/rejected": -16.625829696655273, "step": 4125 }, { "epoch": 1.41, "learning_rate": 4.25166595858433e-07, "logits/chosen": 0.05208912491798401, "logits/rejected": 0.07449754327535629, "logps/chosen": -148.6550750732422, "logps/rejected": -241.255126953125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.03822906315326691, "rewards/margins": 12.470892906188965, "rewards/rejected": -12.50912094116211, "step": 4126 }, { "epoch": 1.41, "learning_rate": 4.247144189949793e-07, "logits/chosen": 0.08954945206642151, "logits/rejected": 0.10301682353019714, "logps/chosen": -176.32763671875, "logps/rejected": -317.4470520019531, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.386176824569702, "rewards/margins": 14.537994384765625, "rewards/rejected": -16.924171447753906, "step": 4127 }, { "epoch": 1.41, "learning_rate": 4.2426241787333215e-07, "logits/chosen": -0.01065190602093935, "logits/rejected": 0.03034438006579876, "logps/chosen": -252.12213134765625, "logps/rejected": -372.03326416015625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.072593927383423, "rewards/margins": 15.725485801696777, "rewards/rejected": -17.798078536987305, "step": 4128 }, { "epoch": 1.41, "learning_rate": 4.238105926315707e-07, "logits/chosen": -0.06693051755428314, "logits/rejected": -0.027575699612498283, "logps/chosen": -220.24546813964844, "logps/rejected": -322.2423095703125, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -2.4853596687316895, "rewards/margins": 14.084967613220215, "rewards/rejected": -16.570327758789062, "step": 4129 }, { "epoch": 1.41, "learning_rate": 4.233589434077217e-07, "logits/chosen": -0.04360463470220566, "logits/rejected": 0.023106705397367477, "logps/chosen": -174.169677734375, "logps/rejected": -257.11663818359375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.0480338335037231, "rewards/margins": 14.608988761901855, "rewards/rejected": -15.657023429870605, "step": 4130 }, { "epoch": 1.41, "learning_rate": 4.2290747033975794e-07, "logits/chosen": 0.0705680176615715, "logits/rejected": 0.0737706869840622, "logps/chosen": -223.28594970703125, "logps/rejected": -441.69647216796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0046393871307373, "rewards/margins": 19.52682113647461, "rewards/rejected": -21.531457901000977, "step": 4131 }, { "epoch": 1.41, "learning_rate": 4.224561735655976e-07, "logits/chosen": -0.02212117239832878, "logits/rejected": 0.02122906781733036, "logps/chosen": -213.6579132080078, "logps/rejected": -374.8617248535156, "loss": 0.0401, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5713365077972412, "rewards/margins": 17.993213653564453, "rewards/rejected": -19.564550399780273, "step": 4132 }, { "epoch": 1.41, "learning_rate": 4.220050532231059e-07, "logits/chosen": -0.06548404693603516, "logits/rejected": -0.04294728487730026, "logps/chosen": -269.1220397949219, "logps/rejected": -481.66448974609375, "loss": 0.0204, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8531599044799805, "rewards/margins": 19.90188980102539, "rewards/rejected": -22.755048751831055, "step": 4133 }, { "epoch": 1.41, "learning_rate": 4.21554109450094e-07, "logits/chosen": -0.020023247227072716, "logits/rejected": -0.01941300369799137, "logps/chosen": -216.28048706054688, "logps/rejected": -423.9326171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.2864058017730713, "rewards/margins": 17.815181732177734, "rewards/rejected": -19.101587295532227, "step": 4134 }, { "epoch": 1.41, "learning_rate": 4.211033423843192e-07, "logits/chosen": 0.049960676580667496, "logits/rejected": 0.059209056198596954, "logps/chosen": -174.21388244628906, "logps/rejected": -339.4910583496094, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.9491302371025085, "rewards/margins": 16.371307373046875, "rewards/rejected": -17.320436477661133, "step": 4135 }, { "epoch": 1.41, "learning_rate": 4.2065275216348416e-07, "logits/chosen": 0.014641639776527882, "logits/rejected": 0.044255055487155914, "logps/chosen": -184.3896484375, "logps/rejected": -332.9718933105469, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.4235575199127197, "rewards/margins": 18.053266525268555, "rewards/rejected": -18.476821899414062, "step": 4136 }, { "epoch": 1.41, "learning_rate": 4.2020233892523804e-07, "logits/chosen": 0.05984935909509659, "logits/rejected": 0.09006526321172714, "logps/chosen": -154.883056640625, "logps/rejected": -301.7525634765625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.9843250513076782, "rewards/margins": 14.432567596435547, "rewards/rejected": -15.416891098022461, "step": 4137 }, { "epoch": 1.41, "learning_rate": 4.1975210280717643e-07, "logits/chosen": -0.006085601635277271, "logits/rejected": 0.05348238721489906, "logps/chosen": -150.10214233398438, "logps/rejected": -256.4464416503906, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -3.0701801776885986, "rewards/margins": 13.645452499389648, "rewards/rejected": -16.715633392333984, "step": 4138 }, { "epoch": 1.41, "learning_rate": 4.19302043946839e-07, "logits/chosen": -0.1321694254875183, "logits/rejected": -0.103946253657341, "logps/chosen": -229.3197784423828, "logps/rejected": -391.9477844238281, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.2974252700805664, "rewards/margins": 15.517181396484375, "rewards/rejected": -17.814607620239258, "step": 4139 }, { "epoch": 1.41, "learning_rate": 4.188521624817142e-07, "logits/chosen": 0.03406032547354698, "logits/rejected": 0.05929465591907501, "logps/chosen": -190.5140838623047, "logps/rejected": -335.12261962890625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.1699051409959793, "rewards/margins": 16.72395133972168, "rewards/rejected": -16.89385414123535, "step": 4140 }, { "epoch": 1.41, "learning_rate": 4.184024585492336e-07, "logits/chosen": -0.10753556340932846, "logits/rejected": -0.0859757587313652, "logps/chosen": -182.40206909179688, "logps/rejected": -377.1309509277344, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0512069463729858, "rewards/margins": 17.451419830322266, "rewards/rejected": -18.502628326416016, "step": 4141 }, { "epoch": 1.41, "learning_rate": 4.1795293228677497e-07, "logits/chosen": 0.09912329912185669, "logits/rejected": 0.12793073058128357, "logps/chosen": -170.8184051513672, "logps/rejected": -315.1657409667969, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.8110387921333313, "rewards/margins": 16.40799903869629, "rewards/rejected": -17.219038009643555, "step": 4142 }, { "epoch": 1.41, "learning_rate": 4.1750358383166396e-07, "logits/chosen": 0.029085706919431686, "logits/rejected": 0.03637239336967468, "logps/chosen": -216.05142211914062, "logps/rejected": -376.2464904785156, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.27005714178085327, "rewards/margins": 17.751829147338867, "rewards/rejected": -18.02188491821289, "step": 4143 }, { "epoch": 1.41, "learning_rate": 4.1705441332116897e-07, "logits/chosen": -0.07167117297649384, "logits/rejected": -0.06729182600975037, "logps/chosen": -288.536376953125, "logps/rejected": -500.3779296875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.1142396628856659, "rewards/margins": 21.222414016723633, "rewards/rejected": -21.108173370361328, "step": 4144 }, { "epoch": 1.41, "learning_rate": 4.1660542089250594e-07, "logits/chosen": -0.14789091050624847, "logits/rejected": -0.10979033261537552, "logps/chosen": -171.5755615234375, "logps/rejected": -312.5867919921875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.4761766195297241, "rewards/margins": 14.233770370483398, "rewards/rejected": -14.709946632385254, "step": 4145 }, { "epoch": 1.42, "learning_rate": 4.1615660668283626e-07, "logits/chosen": -0.09936583042144775, "logits/rejected": -0.053306616842746735, "logps/chosen": -249.13839721679688, "logps/rejected": -342.88873291015625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.5963149070739746, "rewards/margins": 13.15539264678955, "rewards/rejected": -15.751708984375, "step": 4146 }, { "epoch": 1.42, "learning_rate": 4.157079708292656e-07, "logits/chosen": -0.12185945361852646, "logits/rejected": -0.09230147302150726, "logps/chosen": -234.1125030517578, "logps/rejected": -453.77294921875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.2304601669311523, "rewards/margins": 20.474008560180664, "rewards/rejected": -21.7044677734375, "step": 4147 }, { "epoch": 1.42, "learning_rate": 4.1525951346884635e-07, "logits/chosen": -0.06372177600860596, "logits/rejected": -0.044422656297683716, "logps/chosen": -209.47064208984375, "logps/rejected": -345.1332092285156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.22679083049297333, "rewards/margins": 12.137735366821289, "rewards/rejected": -12.36452579498291, "step": 4148 }, { "epoch": 1.42, "learning_rate": 4.1481123473857617e-07, "logits/chosen": -0.03543087840080261, "logits/rejected": 0.030840767547488213, "logps/chosen": -221.91270446777344, "logps/rejected": -294.40673828125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1298277378082275, "rewards/margins": 13.846698760986328, "rewards/rejected": -14.976526260375977, "step": 4149 }, { "epoch": 1.42, "learning_rate": 4.143631347753984e-07, "logits/chosen": 0.09328324347734451, "logits/rejected": 0.10424639284610748, "logps/chosen": -187.56690979003906, "logps/rejected": -306.385986328125, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -2.1208903789520264, "rewards/margins": 10.354185104370117, "rewards/rejected": -12.475076675415039, "step": 4150 }, { "epoch": 1.42, "learning_rate": 4.1391521371620065e-07, "logits/chosen": -0.009397588670253754, "logits/rejected": -0.0006032912060618401, "logps/chosen": -146.67201232910156, "logps/rejected": -353.07391357421875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.869070053100586, "rewards/margins": 16.1048641204834, "rewards/rejected": -17.973934173583984, "step": 4151 }, { "epoch": 1.42, "learning_rate": 4.134674716978169e-07, "logits/chosen": 0.04577388986945152, "logits/rejected": 0.05600346624851227, "logps/chosen": -180.1846923828125, "logps/rejected": -313.9524230957031, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.9133354425430298, "rewards/margins": 14.306053161621094, "rewards/rejected": -16.219388961791992, "step": 4152 }, { "epoch": 1.42, "learning_rate": 4.130199088570266e-07, "logits/chosen": 0.08521684259176254, "logits/rejected": 0.08011111617088318, "logps/chosen": -149.2251739501953, "logps/rejected": -328.27496337890625, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.9105610251426697, "rewards/margins": 15.061853408813477, "rewards/rejected": -15.972412109375, "step": 4153 }, { "epoch": 1.42, "learning_rate": 4.1257252533055333e-07, "logits/chosen": -0.02077486366033554, "logits/rejected": 0.006000319961458445, "logps/chosen": -208.0556640625, "logps/rejected": -343.0411682128906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0459243059158325, "rewards/margins": 16.817676544189453, "rewards/rejected": -17.86359977722168, "step": 4154 }, { "epoch": 1.42, "learning_rate": 4.121253212550668e-07, "logits/chosen": 0.004466314800083637, "logits/rejected": 0.022072702646255493, "logps/chosen": -160.2006072998047, "logps/rejected": -298.7634582519531, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.548339366912842, "rewards/margins": 12.018607139587402, "rewards/rejected": -14.566946983337402, "step": 4155 }, { "epoch": 1.42, "learning_rate": 4.1167829676718226e-07, "logits/chosen": -0.09382471442222595, "logits/rejected": -0.07474694401025772, "logps/chosen": -226.89736938476562, "logps/rejected": -309.76416015625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.102184534072876, "rewards/margins": 10.82271671295166, "rewards/rejected": -12.924901962280273, "step": 4156 }, { "epoch": 1.42, "learning_rate": 4.112314520034583e-07, "logits/chosen": 0.006404360756278038, "logits/rejected": 0.013530971482396126, "logps/chosen": -191.59271240234375, "logps/rejected": -366.47088623046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.122939109802246, "rewards/margins": 17.15964126586914, "rewards/rejected": -18.28257942199707, "step": 4157 }, { "epoch": 1.42, "learning_rate": 4.1078478710040123e-07, "logits/chosen": -0.04618131369352341, "logits/rejected": -0.010230330750346184, "logps/chosen": -211.1617431640625, "logps/rejected": -303.61175537109375, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.636868953704834, "rewards/margins": 11.103687286376953, "rewards/rejected": -12.740556716918945, "step": 4158 }, { "epoch": 1.42, "learning_rate": 4.1033830219445996e-07, "logits/chosen": 0.014061536639928818, "logits/rejected": 0.02847483940422535, "logps/chosen": -190.15353393554688, "logps/rejected": -313.66650390625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.0882271528244019, "rewards/margins": 14.759565353393555, "rewards/rejected": -15.84779167175293, "step": 4159 }, { "epoch": 1.42, "learning_rate": 4.0989199742202995e-07, "logits/chosen": -0.06614742428064346, "logits/rejected": -0.06453320384025574, "logps/chosen": -181.95144653320312, "logps/rejected": -347.5255126953125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.4729249477386475, "rewards/margins": 16.44545555114746, "rewards/rejected": -17.918378829956055, "step": 4160 }, { "epoch": 1.42, "learning_rate": 4.094458729194514e-07, "logits/chosen": -0.023651475086808205, "logits/rejected": -0.007763031870126724, "logps/chosen": -187.80905151367188, "logps/rejected": -391.3815002441406, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -2.686897039413452, "rewards/margins": 18.23465919494629, "rewards/rejected": -20.921558380126953, "step": 4161 }, { "epoch": 1.42, "learning_rate": 4.0899992882300847e-07, "logits/chosen": -0.0479227639734745, "logits/rejected": -0.017805928364396095, "logps/chosen": -158.15769958496094, "logps/rejected": -261.3211975097656, "loss": 0.0163, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9802310466766357, "rewards/margins": 11.127625465393066, "rewards/rejected": -13.107857704162598, "step": 4162 }, { "epoch": 1.42, "learning_rate": 4.085541652689313e-07, "logits/chosen": 0.0020315912552177906, "logits/rejected": 0.02031509019434452, "logps/chosen": -221.543701171875, "logps/rejected": -369.850830078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.3823779821395874, "rewards/margins": 16.568601608276367, "rewards/rejected": -16.95098114013672, "step": 4163 }, { "epoch": 1.42, "learning_rate": 4.0810858239339483e-07, "logits/chosen": -0.038524311035871506, "logits/rejected": 0.0023898757062852383, "logps/chosen": -250.6942596435547, "logps/rejected": -359.33880615234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.5890498757362366, "rewards/margins": 16.887859344482422, "rewards/rejected": -16.298809051513672, "step": 4164 }, { "epoch": 1.42, "learning_rate": 4.076631803325179e-07, "logits/chosen": 0.04447843134403229, "logits/rejected": 0.04900537058711052, "logps/chosen": -196.28494262695312, "logps/rejected": -384.4997253417969, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.0665669441223145, "rewards/margins": 19.208820343017578, "rewards/rejected": -21.275386810302734, "step": 4165 }, { "epoch": 1.42, "learning_rate": 4.072179592223649e-07, "logits/chosen": -0.01773042045533657, "logits/rejected": 0.023185478523373604, "logps/chosen": -245.0286102294922, "logps/rejected": -325.2480773925781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.40888512134552, "rewards/margins": 12.645088195800781, "rewards/rejected": -14.053974151611328, "step": 4166 }, { "epoch": 1.42, "learning_rate": 4.0677291919894473e-07, "logits/chosen": -0.01138768158853054, "logits/rejected": 0.007618342991918325, "logps/chosen": -196.6868133544922, "logps/rejected": -363.4848327636719, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.4458940029144287, "rewards/margins": 15.467169761657715, "rewards/rejected": -17.913063049316406, "step": 4167 }, { "epoch": 1.42, "learning_rate": 4.0632806039821143e-07, "logits/chosen": -0.02672531083226204, "logits/rejected": -0.024626249447464943, "logps/chosen": -173.0308837890625, "logps/rejected": -367.00445556640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.5056638717651367, "rewards/margins": 17.21942710876465, "rewards/rejected": -19.72509002685547, "step": 4168 }, { "epoch": 1.42, "learning_rate": 4.058833829560625e-07, "logits/chosen": 0.03129088506102562, "logits/rejected": 0.04385172575712204, "logps/chosen": -165.01490783691406, "logps/rejected": -286.1839904785156, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.084503412246704, "rewards/margins": 13.760424613952637, "rewards/rejected": -15.844927787780762, "step": 4169 }, { "epoch": 1.42, "learning_rate": 4.0543888700834115e-07, "logits/chosen": 0.021969160065054893, "logits/rejected": 0.05652957037091255, "logps/chosen": -170.91941833496094, "logps/rejected": -313.7063293457031, "loss": 0.0265, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0212059020996094, "rewards/margins": 14.175039291381836, "rewards/rejected": -16.196243286132812, "step": 4170 }, { "epoch": 1.42, "learning_rate": 4.0499457269083493e-07, "logits/chosen": -0.010870628990232944, "logits/rejected": 0.003047733101993799, "logps/chosen": -180.40740966796875, "logps/rejected": -289.70233154296875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3363640308380127, "rewards/margins": 11.477338790893555, "rewards/rejected": -12.813702583312988, "step": 4171 }, { "epoch": 1.42, "learning_rate": 4.045504401392749e-07, "logits/chosen": 0.01133049838244915, "logits/rejected": 0.07378923147916794, "logps/chosen": -247.08607482910156, "logps/rejected": -313.4985656738281, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -0.21093156933784485, "rewards/margins": 15.777246475219727, "rewards/rejected": -15.988178253173828, "step": 4172 }, { "epoch": 1.42, "learning_rate": 4.0410648948933856e-07, "logits/chosen": -0.036187607795000076, "logits/rejected": -0.0067547340877354145, "logps/chosen": -238.17970275878906, "logps/rejected": -374.9486083984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.909498691558838, "rewards/margins": 16.91185760498047, "rewards/rejected": -18.82135772705078, "step": 4173 }, { "epoch": 1.42, "learning_rate": 4.036627208766463e-07, "logits/chosen": 0.02145860344171524, "logits/rejected": 0.0837370827794075, "logps/chosen": -210.7877197265625, "logps/rejected": -296.8675842285156, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.0128191709518433, "rewards/margins": 14.446981430053711, "rewards/rejected": -15.459800720214844, "step": 4174 }, { "epoch": 1.42, "learning_rate": 4.032191344367625e-07, "logits/chosen": -0.05980779230594635, "logits/rejected": -0.04156801104545593, "logps/chosen": -197.35592651367188, "logps/rejected": -387.2252197265625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.167440414428711, "rewards/margins": 18.992931365966797, "rewards/rejected": -20.16037368774414, "step": 4175 }, { "epoch": 1.43, "learning_rate": 4.0277573030519797e-07, "logits/chosen": -0.02314392663538456, "logits/rejected": 0.011342115700244904, "logps/chosen": -209.12103271484375, "logps/rejected": -395.67901611328125, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -1.0261938571929932, "rewards/margins": 18.061552047729492, "rewards/rejected": -19.087745666503906, "step": 4176 }, { "epoch": 1.43, "learning_rate": 4.023325086174056e-07, "logits/chosen": -0.003279195399954915, "logits/rejected": 0.03475847467780113, "logps/chosen": -178.15826416015625, "logps/rejected": -274.5827941894531, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5245397090911865, "rewards/margins": 13.320530891418457, "rewards/rejected": -14.845070838928223, "step": 4177 }, { "epoch": 1.43, "learning_rate": 4.01889469508784e-07, "logits/chosen": -0.020658325403928757, "logits/rejected": -0.02799358405172825, "logps/chosen": -223.26284790039062, "logps/rejected": -425.7689208984375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.922100305557251, "rewards/margins": 17.405967712402344, "rewards/rejected": -19.328065872192383, "step": 4178 }, { "epoch": 1.43, "learning_rate": 4.0144661311467564e-07, "logits/chosen": -0.06512242555618286, "logits/rejected": -0.017734650522470474, "logps/chosen": -217.3626251220703, "logps/rejected": -330.7609558105469, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.036505103111267, "rewards/margins": 14.892688751220703, "rewards/rejected": -15.929195404052734, "step": 4179 }, { "epoch": 1.43, "learning_rate": 4.0100393957036636e-07, "logits/chosen": -0.07355337589979172, "logits/rejected": -0.04441022872924805, "logps/chosen": -261.4203186035156, "logps/rejected": -368.8995666503906, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -3.0412049293518066, "rewards/margins": 13.294940948486328, "rewards/rejected": -16.336145401000977, "step": 4180 }, { "epoch": 1.43, "learning_rate": 4.0056144901108723e-07, "logits/chosen": -0.08696087449789047, "logits/rejected": -0.04837290570139885, "logps/chosen": -203.1338348388672, "logps/rejected": -403.00250244140625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0980844497680664, "rewards/margins": 19.86845588684082, "rewards/rejected": -21.966541290283203, "step": 4181 }, { "epoch": 1.43, "learning_rate": 4.0011914157201296e-07, "logits/chosen": -0.10363743454217911, "logits/rejected": -0.05435194820165634, "logps/chosen": -223.90469360351562, "logps/rejected": -344.98565673828125, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.5130459070205688, "rewards/margins": 15.517511367797852, "rewards/rejected": -17.03055763244629, "step": 4182 }, { "epoch": 1.43, "learning_rate": 3.996770173882629e-07, "logits/chosen": 0.02479105442762375, "logits/rejected": 0.0638001561164856, "logps/chosen": -183.98731994628906, "logps/rejected": -296.8548583984375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.8924963474273682, "rewards/margins": 15.024571418762207, "rewards/rejected": -15.91706657409668, "step": 4183 }, { "epoch": 1.43, "learning_rate": 3.99235076594899e-07, "logits/chosen": -0.006737419869750738, "logits/rejected": 0.008457697927951813, "logps/chosen": -224.94287109375, "logps/rejected": -340.04754638671875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.9939292073249817, "rewards/margins": 13.880585670471191, "rewards/rejected": -14.87451457977295, "step": 4184 }, { "epoch": 1.43, "learning_rate": 3.9879331932692826e-07, "logits/chosen": -0.08326048403978348, "logits/rejected": -0.0658564642071724, "logps/chosen": -190.67550659179688, "logps/rejected": -353.25689697265625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.18735657632350922, "rewards/margins": 15.95608139038086, "rewards/rejected": -15.768725395202637, "step": 4185 }, { "epoch": 1.43, "learning_rate": 3.9835174571930217e-07, "logits/chosen": -0.006098495330661535, "logits/rejected": 0.00446719815954566, "logps/chosen": -157.85763549804688, "logps/rejected": -321.7091064453125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.3894942998886108, "rewards/margins": 15.712061882019043, "rewards/rejected": -17.1015567779541, "step": 4186 }, { "epoch": 1.43, "learning_rate": 3.979103559069141e-07, "logits/chosen": 0.01615132763981819, "logits/rejected": 0.05039064958691597, "logps/chosen": -180.05462646484375, "logps/rejected": -333.85870361328125, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -2.126145839691162, "rewards/margins": 16.44725799560547, "rewards/rejected": -18.57340431213379, "step": 4187 }, { "epoch": 1.43, "learning_rate": 3.9746915002460404e-07, "logits/chosen": 0.03344133123755455, "logits/rejected": 0.07438033819198608, "logps/chosen": -195.9428253173828, "logps/rejected": -348.47808837890625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1758301258087158, "rewards/margins": 18.35086441040039, "rewards/rejected": -19.52669334411621, "step": 4188 }, { "epoch": 1.43, "learning_rate": 3.970281282071534e-07, "logits/chosen": 0.03559369221329689, "logits/rejected": 0.06972271203994751, "logps/chosen": -198.54759216308594, "logps/rejected": -339.06982421875, "loss": 0.0187, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0676400661468506, "rewards/margins": 15.6072998046875, "rewards/rejected": -17.67494010925293, "step": 4189 }, { "epoch": 1.43, "learning_rate": 3.9658729058928775e-07, "logits/chosen": -0.03237501531839371, "logits/rejected": -0.02564375288784504, "logps/chosen": -244.70140075683594, "logps/rejected": -401.74261474609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2504891157150269, "rewards/margins": 14.981456756591797, "rewards/rejected": -16.231945037841797, "step": 4190 }, { "epoch": 1.43, "learning_rate": 3.961466373056783e-07, "logits/chosen": -0.009945965372025967, "logits/rejected": 0.015624073334038258, "logps/chosen": -213.75799560546875, "logps/rejected": -383.14642333984375, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -0.11755222082138062, "rewards/margins": 19.727458953857422, "rewards/rejected": -19.845008850097656, "step": 4191 }, { "epoch": 1.43, "learning_rate": 3.957061684909374e-07, "logits/chosen": -0.12637083232402802, "logits/rejected": -0.09347859025001526, "logps/chosen": -202.0721435546875, "logps/rejected": -362.6324768066406, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.5713436603546143, "rewards/margins": 14.947925567626953, "rewards/rejected": -16.519269943237305, "step": 4192 }, { "epoch": 1.43, "learning_rate": 3.952658842796226e-07, "logits/chosen": -0.054767999798059464, "logits/rejected": -0.0217535849660635, "logps/chosen": -221.98928833007812, "logps/rejected": -320.741943359375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.6680686473846436, "rewards/margins": 14.020345687866211, "rewards/rejected": -16.68841552734375, "step": 4193 }, { "epoch": 1.43, "learning_rate": 3.948257848062351e-07, "logits/chosen": -0.011898254044353962, "logits/rejected": 0.023551449179649353, "logps/chosen": -186.8629150390625, "logps/rejected": -391.6650695800781, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.6968793869018555, "rewards/margins": 21.045766830444336, "rewards/rejected": -22.742647171020508, "step": 4194 }, { "epoch": 1.43, "learning_rate": 3.9438587020521816e-07, "logits/chosen": 0.05642681196331978, "logits/rejected": 0.092611163854599, "logps/chosen": -210.15777587890625, "logps/rejected": -332.3763122558594, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.872929573059082, "rewards/margins": 14.822173118591309, "rewards/rejected": -16.69510269165039, "step": 4195 }, { "epoch": 1.43, "learning_rate": 3.9394614061096045e-07, "logits/chosen": -0.04191474989056587, "logits/rejected": 0.0019247939344495535, "logps/chosen": -193.6126708984375, "logps/rejected": -333.6989440917969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9337043166160583, "rewards/margins": 16.333431243896484, "rewards/rejected": -17.267135620117188, "step": 4196 }, { "epoch": 1.43, "learning_rate": 3.9350659615779326e-07, "logits/chosen": 0.06195037066936493, "logits/rejected": 0.06044204905629158, "logps/chosen": -223.83621215820312, "logps/rejected": -411.2901916503906, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.8476182818412781, "rewards/margins": 17.556245803833008, "rewards/rejected": -18.40386390686035, "step": 4197 }, { "epoch": 1.43, "learning_rate": 3.930672369799909e-07, "logits/chosen": -0.06285440921783447, "logits/rejected": -0.01647298038005829, "logps/chosen": -216.59451293945312, "logps/rejected": -393.6981201171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.032987117767334, "rewards/margins": 19.265792846679688, "rewards/rejected": -21.298782348632812, "step": 4198 }, { "epoch": 1.43, "learning_rate": 3.926280632117717e-07, "logits/chosen": -0.11334114521741867, "logits/rejected": -0.10369455814361572, "logps/chosen": -195.60092163085938, "logps/rejected": -419.1609191894531, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1177810430526733, "rewards/margins": 21.092695236206055, "rewards/rejected": -22.21047592163086, "step": 4199 }, { "epoch": 1.43, "learning_rate": 3.9218907498729726e-07, "logits/chosen": -0.027720047160983086, "logits/rejected": 0.023540060967206955, "logps/chosen": -189.8086395263672, "logps/rejected": -272.8860778808594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.0038390159606934, "rewards/margins": 12.040349960327148, "rewards/rejected": -14.044189453125, "step": 4200 }, { "epoch": 1.43, "learning_rate": 3.9175027244067295e-07, "logits/chosen": -0.05357847735285759, "logits/rejected": -0.02359001524746418, "logps/chosen": -205.22238159179688, "logps/rejected": -357.1331787109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.0914459228515625, "rewards/margins": 15.54226303100586, "rewards/rejected": -16.633708953857422, "step": 4201 }, { "epoch": 1.43, "learning_rate": 3.9131165570594584e-07, "logits/chosen": 0.04699631407856941, "logits/rejected": 0.07849107682704926, "logps/chosen": -204.45880126953125, "logps/rejected": -294.7079772949219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6727695465087891, "rewards/margins": 13.792994499206543, "rewards/rejected": -14.465764045715332, "step": 4202 }, { "epoch": 1.43, "learning_rate": 3.9087322491710793e-07, "logits/chosen": -0.024750003591179848, "logits/rejected": -0.001031131367199123, "logps/chosen": -170.07632446289062, "logps/rejected": -318.77294921875, "loss": 0.0172, "rewards/accuracies": 0.9375, "rewards/chosen": -2.720118522644043, "rewards/margins": 13.736578941345215, "rewards/rejected": -16.456697463989258, "step": 4203 }, { "epoch": 1.43, "learning_rate": 3.9043498020809397e-07, "logits/chosen": -0.11313164979219437, "logits/rejected": -0.09590020775794983, "logps/chosen": -189.86398315429688, "logps/rejected": -376.147216796875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.1130305528640747, "rewards/margins": 18.306848526000977, "rewards/rejected": -19.419879913330078, "step": 4204 }, { "epoch": 1.44, "learning_rate": 3.8999692171278074e-07, "logits/chosen": -0.04320977255702019, "logits/rejected": -0.033049874007701874, "logps/chosen": -163.74301147460938, "logps/rejected": -374.00555419921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.1924080848693848, "rewards/margins": 17.42249298095703, "rewards/rejected": -20.61490249633789, "step": 4205 }, { "epoch": 1.44, "learning_rate": 3.895590495649903e-07, "logits/chosen": 0.00013870967086404562, "logits/rejected": 0.00918930396437645, "logps/chosen": -183.10499572753906, "logps/rejected": -349.3490295410156, "loss": 0.0509, "rewards/accuracies": 0.875, "rewards/chosen": -3.636197566986084, "rewards/margins": 13.460742950439453, "rewards/rejected": -17.096940994262695, "step": 4206 }, { "epoch": 1.44, "learning_rate": 3.891213638984857e-07, "logits/chosen": -0.08402857184410095, "logits/rejected": -0.06705177575349808, "logps/chosen": -219.65109252929688, "logps/rejected": -404.680908203125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.7166743278503418, "rewards/margins": 18.364452362060547, "rewards/rejected": -20.081125259399414, "step": 4207 }, { "epoch": 1.44, "learning_rate": 3.8868386484697413e-07, "logits/chosen": 0.057547371834516525, "logits/rejected": 0.08497805148363113, "logps/chosen": -155.23670959472656, "logps/rejected": -225.53073120117188, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.920461893081665, "rewards/margins": 10.830486297607422, "rewards/rejected": -12.750947952270508, "step": 4208 }, { "epoch": 1.44, "learning_rate": 3.882465525441058e-07, "logits/chosen": -0.06361647695302963, "logits/rejected": -0.04821087419986725, "logps/chosen": -238.12103271484375, "logps/rejected": -380.8115539550781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.026576042175293, "rewards/margins": 14.75329875946045, "rewards/rejected": -15.779876708984375, "step": 4209 }, { "epoch": 1.44, "learning_rate": 3.8780942712347296e-07, "logits/chosen": -0.16723619401454926, "logits/rejected": -0.16551819443702698, "logps/chosen": -233.87338256835938, "logps/rejected": -439.3558349609375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.6144729852676392, "rewards/margins": 19.2586612701416, "rewards/rejected": -19.873136520385742, "step": 4210 }, { "epoch": 1.44, "learning_rate": 3.873724887186116e-07, "logits/chosen": -0.04272225871682167, "logits/rejected": -0.02343662455677986, "logps/chosen": -224.70542907714844, "logps/rejected": -304.5709228515625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.6643152832984924, "rewards/margins": 14.060139656066895, "rewards/rejected": -14.724455833435059, "step": 4211 }, { "epoch": 1.44, "learning_rate": 3.86935737463001e-07, "logits/chosen": 0.04241492599248886, "logits/rejected": 0.07162819057703018, "logps/chosen": -171.42909240722656, "logps/rejected": -299.06146240234375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.2001688480377197, "rewards/margins": 12.915106773376465, "rewards/rejected": -15.115274429321289, "step": 4212 }, { "epoch": 1.44, "learning_rate": 3.8649917349006176e-07, "logits/chosen": 0.011358996853232384, "logits/rejected": 0.03666776791214943, "logps/chosen": -243.05491638183594, "logps/rejected": -402.0093994140625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -1.9398353099822998, "rewards/margins": 17.5125675201416, "rewards/rejected": -19.452404022216797, "step": 4213 }, { "epoch": 1.44, "learning_rate": 3.8606279693315845e-07, "logits/chosen": -0.03563753888010979, "logits/rejected": -0.019530678167939186, "logps/chosen": -175.89479064941406, "logps/rejected": -408.7016906738281, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.7488148212432861, "rewards/margins": 19.57317352294922, "rewards/rejected": -21.321989059448242, "step": 4214 }, { "epoch": 1.44, "learning_rate": 3.856266079255982e-07, "logits/chosen": -0.10103195160627365, "logits/rejected": -0.07834777235984802, "logps/chosen": -178.6410369873047, "logps/rejected": -280.426025390625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.0224320888519287, "rewards/margins": 12.47788143157959, "rewards/rejected": -14.500314712524414, "step": 4215 }, { "epoch": 1.44, "learning_rate": 3.8519060660063106e-07, "logits/chosen": -0.041712213307619095, "logits/rejected": -0.006509703584015369, "logps/chosen": -232.2358856201172, "logps/rejected": -351.0400695800781, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8166530132293701, "rewards/margins": 16.292274475097656, "rewards/rejected": -17.10892677307129, "step": 4216 }, { "epoch": 1.44, "learning_rate": 3.8475479309144866e-07, "logits/chosen": -0.005274644587188959, "logits/rejected": 0.019257504492998123, "logps/chosen": -170.71075439453125, "logps/rejected": -337.3369140625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.7471543550491333, "rewards/margins": 17.522762298583984, "rewards/rejected": -18.269916534423828, "step": 4217 }, { "epoch": 1.44, "learning_rate": 3.843191675311864e-07, "logits/chosen": 0.00926986988633871, "logits/rejected": 0.021081862971186638, "logps/chosen": -152.4735565185547, "logps/rejected": -280.86663818359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.076991319656372, "rewards/margins": 13.010111808776855, "rewards/rejected": -14.087102890014648, "step": 4218 }, { "epoch": 1.44, "learning_rate": 3.838837300529223e-07, "logits/chosen": -0.013575690798461437, "logits/rejected": 0.023444764316082, "logps/chosen": -203.48513793945312, "logps/rejected": -391.64324951171875, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.644679069519043, "rewards/margins": 18.759044647216797, "rewards/rejected": -20.403722763061523, "step": 4219 }, { "epoch": 1.44, "learning_rate": 3.8344848078967527e-07, "logits/chosen": -0.045636218041181564, "logits/rejected": -0.002265418181195855, "logps/chosen": -210.9344940185547, "logps/rejected": -319.6855163574219, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.4409375190734863, "rewards/margins": 12.767860412597656, "rewards/rejected": -15.208797454833984, "step": 4220 }, { "epoch": 1.44, "learning_rate": 3.830134198744095e-07, "logits/chosen": -0.034781720489263535, "logits/rejected": 0.010609989054501057, "logps/chosen": -225.56546020507812, "logps/rejected": -386.2271728515625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.6442687511444092, "rewards/margins": 17.363191604614258, "rewards/rejected": -18.007461547851562, "step": 4221 }, { "epoch": 1.44, "learning_rate": 3.8257854744002906e-07, "logits/chosen": -0.02279403991997242, "logits/rejected": 0.0022073634900152683, "logps/chosen": -166.101318359375, "logps/rejected": -380.63818359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.026998281478882, "rewards/margins": 18.792415618896484, "rewards/rejected": -20.819416046142578, "step": 4222 }, { "epoch": 1.44, "learning_rate": 3.8214386361938177e-07, "logits/chosen": -0.00724566588178277, "logits/rejected": 0.03371892496943474, "logps/chosen": -242.17091369628906, "logps/rejected": -361.9986877441406, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.3444746434688568, "rewards/margins": 15.99767780303955, "rewards/rejected": -16.342153549194336, "step": 4223 }, { "epoch": 1.44, "learning_rate": 3.817093685452578e-07, "logits/chosen": -0.023386817425489426, "logits/rejected": 0.0011957279639318585, "logps/chosen": -208.82406616210938, "logps/rejected": -349.63800048828125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.13982892036438, "rewards/margins": 16.98536491394043, "rewards/rejected": -19.125192642211914, "step": 4224 }, { "epoch": 1.44, "learning_rate": 3.8127506235038885e-07, "logits/chosen": 0.09527799487113953, "logits/rejected": 0.13678894937038422, "logps/chosen": -189.12982177734375, "logps/rejected": -285.7738037109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.802827000617981, "rewards/margins": 15.342550277709961, "rewards/rejected": -16.145376205444336, "step": 4225 }, { "epoch": 1.44, "learning_rate": 3.808409451674498e-07, "logits/chosen": -0.08840467035770416, "logits/rejected": -0.04982425644993782, "logps/chosen": -227.16090393066406, "logps/rejected": -339.5008850097656, "loss": 0.0122, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1980125904083252, "rewards/margins": 16.7583065032959, "rewards/rejected": -17.956317901611328, "step": 4226 }, { "epoch": 1.44, "learning_rate": 3.804070171290578e-07, "logits/chosen": -0.022867659106850624, "logits/rejected": 0.01749328151345253, "logps/chosen": -202.103271484375, "logps/rejected": -398.2099609375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9254832863807678, "rewards/margins": 20.919300079345703, "rewards/rejected": -21.844783782958984, "step": 4227 }, { "epoch": 1.44, "learning_rate": 3.79973278367771e-07, "logits/chosen": -0.03172614052891731, "logits/rejected": -0.009426610544323921, "logps/chosen": -167.28773498535156, "logps/rejected": -248.46524047851562, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -2.3514325618743896, "rewards/margins": 10.593441009521484, "rewards/rejected": -12.944873809814453, "step": 4228 }, { "epoch": 1.44, "learning_rate": 3.7953972901609124e-07, "logits/chosen": 0.056695662438869476, "logits/rejected": 0.11631498485803604, "logps/chosen": -207.12942504882812, "logps/rejected": -236.4584197998047, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.9970195293426514, "rewards/margins": 12.44167709350586, "rewards/rejected": -13.43869686126709, "step": 4229 }, { "epoch": 1.44, "learning_rate": 3.7910636920646197e-07, "logits/chosen": -0.14766794443130493, "logits/rejected": -0.12883518636226654, "logps/chosen": -228.96060180664062, "logps/rejected": -385.79815673828125, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.6832831501960754, "rewards/margins": 15.100590705871582, "rewards/rejected": -15.78387451171875, "step": 4230 }, { "epoch": 1.44, "learning_rate": 3.7867319907126803e-07, "logits/chosen": -0.027942031621932983, "logits/rejected": -0.0046631209552288055, "logps/chosen": -214.77301025390625, "logps/rejected": -322.6080627441406, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.697066068649292, "rewards/margins": 12.22495174407959, "rewards/rejected": -13.922018051147461, "step": 4231 }, { "epoch": 1.44, "learning_rate": 3.782402187428374e-07, "logits/chosen": 0.005976693704724312, "logits/rejected": 0.012432762421667576, "logps/chosen": -242.9864501953125, "logps/rejected": -374.2803955078125, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.058471858501434326, "rewards/margins": 12.238439559936523, "rewards/rejected": -12.296911239624023, "step": 4232 }, { "epoch": 1.44, "learning_rate": 3.778074283534394e-07, "logits/chosen": 0.065853051841259, "logits/rejected": 0.08669169992208481, "logps/chosen": -198.48062133789062, "logps/rejected": -291.3291320800781, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.1387460231781006, "rewards/margins": 12.896527290344238, "rewards/rejected": -14.035273551940918, "step": 4233 }, { "epoch": 1.45, "learning_rate": 3.7737482803528595e-07, "logits/chosen": -0.005908377002924681, "logits/rejected": 0.042081523686647415, "logps/chosen": -243.2903594970703, "logps/rejected": -347.1382751464844, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.24405065178871155, "rewards/margins": 16.968828201293945, "rewards/rejected": -17.212879180908203, "step": 4234 }, { "epoch": 1.45, "learning_rate": 3.769424179205297e-07, "logits/chosen": -0.12743404507637024, "logits/rejected": -0.1046648770570755, "logps/chosen": -230.69992065429688, "logps/rejected": -424.89886474609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.1864757537841797, "rewards/margins": 17.723451614379883, "rewards/rejected": -19.909927368164062, "step": 4235 }, { "epoch": 1.45, "learning_rate": 3.765101981412665e-07, "logits/chosen": -0.12384019792079926, "logits/rejected": -0.10306272655725479, "logps/chosen": -231.62860107421875, "logps/rejected": -444.0261535644531, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.069145917892456, "rewards/margins": 20.924888610839844, "rewards/rejected": -22.994033813476562, "step": 4236 }, { "epoch": 1.45, "learning_rate": 3.7607816882953334e-07, "logits/chosen": 0.0028843332547694445, "logits/rejected": 0.020722197368741035, "logps/chosen": -198.70848083496094, "logps/rejected": -373.2131042480469, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.9357742071151733, "rewards/margins": 17.06053924560547, "rewards/rejected": -18.996313095092773, "step": 4237 }, { "epoch": 1.45, "learning_rate": 3.756463301173094e-07, "logits/chosen": 0.11765634268522263, "logits/rejected": 0.11482115089893341, "logps/chosen": -181.8152618408203, "logps/rejected": -366.1754150390625, "loss": 0.0724, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4713857173919678, "rewards/margins": 16.455310821533203, "rewards/rejected": -18.926698684692383, "step": 4238 }, { "epoch": 1.45, "learning_rate": 3.7521468213651564e-07, "logits/chosen": -0.009212701581418514, "logits/rejected": 0.02867283672094345, "logps/chosen": -218.0838623046875, "logps/rejected": -302.13165283203125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.4689974784851074, "rewards/margins": 15.742385864257812, "rewards/rejected": -18.211381912231445, "step": 4239 }, { "epoch": 1.45, "learning_rate": 3.7478322501901385e-07, "logits/chosen": -0.058216944336891174, "logits/rejected": -0.03756759315729141, "logps/chosen": -216.89486694335938, "logps/rejected": -323.099853515625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.911278486251831, "rewards/margins": 13.5047607421875, "rewards/rejected": -14.416040420532227, "step": 4240 }, { "epoch": 1.45, "learning_rate": 3.7435195889660875e-07, "logits/chosen": 0.0027408702298998833, "logits/rejected": 0.037245333194732666, "logps/chosen": -204.13670349121094, "logps/rejected": -367.1159362792969, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.0582869052886963, "rewards/margins": 15.054511070251465, "rewards/rejected": -17.1127986907959, "step": 4241 }, { "epoch": 1.45, "learning_rate": 3.7392088390104626e-07, "logits/chosen": 0.009786727838218212, "logits/rejected": 0.04480089992284775, "logps/chosen": -154.2119598388672, "logps/rejected": -248.61013793945312, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.2918466329574585, "rewards/margins": 13.202800750732422, "rewards/rejected": -14.494645118713379, "step": 4242 }, { "epoch": 1.45, "learning_rate": 3.734900001640134e-07, "logits/chosen": -0.04023665189743042, "logits/rejected": 0.006712930742651224, "logps/chosen": -239.12698364257812, "logps/rejected": -346.6790771484375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.5493593215942383, "rewards/margins": 15.487953186035156, "rewards/rejected": -16.037311553955078, "step": 4243 }, { "epoch": 1.45, "learning_rate": 3.730593078171396e-07, "logits/chosen": -0.089379221200943, "logits/rejected": -0.03599495440721512, "logps/chosen": -266.79071044921875, "logps/rejected": -458.072265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5415186882019043, "rewards/margins": 21.807384490966797, "rewards/rejected": -23.348901748657227, "step": 4244 }, { "epoch": 1.45, "learning_rate": 3.726288069919954e-07, "logits/chosen": 0.015180502086877823, "logits/rejected": 0.04608301818370819, "logps/chosen": -231.4970703125, "logps/rejected": -400.93585205078125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.2138731479644775, "rewards/margins": 16.43766212463379, "rewards/rejected": -17.651535034179688, "step": 4245 }, { "epoch": 1.45, "learning_rate": 3.721984978200925e-07, "logits/chosen": -0.008245473727583885, "logits/rejected": -0.009014535695314407, "logps/chosen": -161.51071166992188, "logps/rejected": -303.9796447753906, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.580418825149536, "rewards/margins": 11.299603462219238, "rewards/rejected": -13.880020141601562, "step": 4246 }, { "epoch": 1.45, "learning_rate": 3.717683804328847e-07, "logits/chosen": -0.021382419392466545, "logits/rejected": -0.0028944620862603188, "logps/chosen": -181.50498962402344, "logps/rejected": -366.1284484863281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6095502376556396, "rewards/margins": 16.108861923217773, "rewards/rejected": -17.71841049194336, "step": 4247 }, { "epoch": 1.45, "learning_rate": 3.7133845496176683e-07, "logits/chosen": 0.057613078504800797, "logits/rejected": 0.06625040620565414, "logps/chosen": -167.61001586914062, "logps/rejected": -353.322265625, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.9852737188339233, "rewards/margins": 16.878955841064453, "rewards/rejected": -18.86423110961914, "step": 4248 }, { "epoch": 1.45, "learning_rate": 3.709087215380757e-07, "logits/chosen": -0.047589316964149475, "logits/rejected": -0.028938353061676025, "logps/chosen": -223.86627197265625, "logps/rejected": -467.09893798828125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.4064786434173584, "rewards/margins": 20.843162536621094, "rewards/rejected": -23.2496395111084, "step": 4249 }, { "epoch": 1.45, "learning_rate": 3.704791802930881e-07, "logits/chosen": 0.005020338576287031, "logits/rejected": -0.005042038392275572, "logps/chosen": -176.43328857421875, "logps/rejected": -365.5908203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.487475872039795, "rewards/margins": 13.472542762756348, "rewards/rejected": -15.960018157958984, "step": 4250 }, { "epoch": 1.45, "learning_rate": 3.700498313580233e-07, "logits/chosen": 0.0683574452996254, "logits/rejected": 0.07745442539453506, "logps/chosen": -188.38792419433594, "logps/rejected": -402.7378234863281, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.9772533178329468, "rewards/margins": 19.72830581665039, "rewards/rejected": -20.70555877685547, "step": 4251 }, { "epoch": 1.45, "learning_rate": 3.696206748640416e-07, "logits/chosen": 0.0041666338220238686, "logits/rejected": 0.03654920682311058, "logps/chosen": -244.34315490722656, "logps/rejected": -451.78515625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.372896432876587, "rewards/margins": 22.317373275756836, "rewards/rejected": -24.690271377563477, "step": 4252 }, { "epoch": 1.45, "learning_rate": 3.691917109422442e-07, "logits/chosen": -0.007970264181494713, "logits/rejected": -0.000530744029674679, "logps/chosen": -228.59498596191406, "logps/rejected": -394.98944091796875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.08255291730165482, "rewards/margins": 18.042091369628906, "rewards/rejected": -17.959537506103516, "step": 4253 }, { "epoch": 1.45, "learning_rate": 3.6876293972367413e-07, "logits/chosen": -0.05332066863775253, "logits/rejected": -0.024912528693675995, "logps/chosen": -256.6091003417969, "logps/rejected": -393.7578430175781, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1268374919891357, "rewards/margins": 17.253713607788086, "rewards/rejected": -18.380552291870117, "step": 4254 }, { "epoch": 1.45, "learning_rate": 3.683343613393144e-07, "logits/chosen": -0.031041333451867104, "logits/rejected": 0.0018988698720932007, "logps/chosen": -197.15838623046875, "logps/rejected": -365.60516357421875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.5671249628067017, "rewards/margins": 17.21870231628418, "rewards/rejected": -18.785825729370117, "step": 4255 }, { "epoch": 1.45, "learning_rate": 3.679059759200901e-07, "logits/chosen": -0.021633008494973183, "logits/rejected": -0.02164207212626934, "logps/chosen": -147.32699584960938, "logps/rejected": -368.4207763671875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.247025728225708, "rewards/margins": 17.970720291137695, "rewards/rejected": -19.217744827270508, "step": 4256 }, { "epoch": 1.45, "learning_rate": 3.6747778359686744e-07, "logits/chosen": 0.0196559838950634, "logits/rejected": 0.05645250529050827, "logps/chosen": -199.94549560546875, "logps/rejected": -298.5698547363281, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.1889822483062744, "rewards/margins": 13.246567726135254, "rewards/rejected": -14.435550689697266, "step": 4257 }, { "epoch": 1.45, "learning_rate": 3.6704978450045243e-07, "logits/chosen": -0.037152599543333054, "logits/rejected": 0.006627483293414116, "logps/chosen": -219.95675659179688, "logps/rejected": -337.7279052734375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.11459112167358398, "rewards/margins": 16.306354522705078, "rewards/rejected": -16.19176483154297, "step": 4258 }, { "epoch": 1.45, "learning_rate": 3.666219787615935e-07, "logits/chosen": -0.015446866862475872, "logits/rejected": -0.010619192384183407, "logps/chosen": -159.55322265625, "logps/rejected": -337.5633544921875, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -2.582277774810791, "rewards/margins": 15.40095043182373, "rewards/rejected": -17.98322868347168, "step": 4259 }, { "epoch": 1.45, "learning_rate": 3.661943665109796e-07, "logits/chosen": 0.03551032766699791, "logits/rejected": 0.05385424196720123, "logps/chosen": -143.55661010742188, "logps/rejected": -323.0099182128906, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -1.530988097190857, "rewards/margins": 16.440410614013672, "rewards/rejected": -17.971399307250977, "step": 4260 }, { "epoch": 1.45, "learning_rate": 3.6576694787923954e-07, "logits/chosen": 0.059882160276174545, "logits/rejected": 0.07255014777183533, "logps/chosen": -178.69273376464844, "logps/rejected": -320.857421875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.7182135581970215, "rewards/margins": 14.458044052124023, "rewards/rejected": -16.176258087158203, "step": 4261 }, { "epoch": 1.45, "learning_rate": 3.653397229969444e-07, "logits/chosen": -0.06898240745067596, "logits/rejected": -0.029108908027410507, "logps/chosen": -183.45297241210938, "logps/rejected": -368.0339050292969, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.222429633140564, "rewards/margins": 20.890939712524414, "rewards/rejected": -22.113372802734375, "step": 4262 }, { "epoch": 1.45, "learning_rate": 3.649126919946053e-07, "logits/chosen": -0.026936635375022888, "logits/rejected": 0.003903949400410056, "logps/chosen": -202.53916931152344, "logps/rejected": -371.5957946777344, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.648055911064148, "rewards/margins": 17.816747665405273, "rewards/rejected": -19.464801788330078, "step": 4263 }, { "epoch": 1.46, "learning_rate": 3.644858550026748e-07, "logits/chosen": 0.11436834931373596, "logits/rejected": 0.11904454976320267, "logps/chosen": -106.1519775390625, "logps/rejected": -207.4963836669922, "loss": 0.0177, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3753160238265991, "rewards/margins": 10.237922668457031, "rewards/rejected": -10.613239288330078, "step": 4264 }, { "epoch": 1.46, "learning_rate": 3.640592121515449e-07, "logits/chosen": 0.0464959554374218, "logits/rejected": 0.05636525899171829, "logps/chosen": -240.6416473388672, "logps/rejected": -402.96484375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.831541657447815, "rewards/margins": 17.58760643005371, "rewards/rejected": -19.419147491455078, "step": 4265 }, { "epoch": 1.46, "learning_rate": 3.636327635715496e-07, "logits/chosen": -0.009787661954760551, "logits/rejected": 0.017288217321038246, "logps/chosen": -256.4912109375, "logps/rejected": -442.9214172363281, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.337851881980896, "rewards/margins": 18.288352966308594, "rewards/rejected": -19.626205444335938, "step": 4266 }, { "epoch": 1.46, "learning_rate": 3.6320650939296296e-07, "logits/chosen": -0.0032497141510248184, "logits/rejected": 0.01565287820994854, "logps/chosen": -125.81542205810547, "logps/rejected": -276.9338684082031, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.8981990814208984, "rewards/margins": 14.861739158630371, "rewards/rejected": -16.759937286376953, "step": 4267 }, { "epoch": 1.46, "learning_rate": 3.627804497460001e-07, "logits/chosen": 0.08367542922496796, "logits/rejected": 0.10720764845609665, "logps/chosen": -204.21180725097656, "logps/rejected": -343.292236328125, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.6651320457458496, "rewards/margins": 14.19681453704834, "rewards/rejected": -16.86194610595703, "step": 4268 }, { "epoch": 1.46, "learning_rate": 3.623545847608156e-07, "logits/chosen": -0.023247726261615753, "logits/rejected": -0.0003280306118540466, "logps/chosen": -211.2735595703125, "logps/rejected": -369.7007141113281, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.5724010467529297, "rewards/margins": 15.540270805358887, "rewards/rejected": -17.1126708984375, "step": 4269 }, { "epoch": 1.46, "learning_rate": 3.6192891456750587e-07, "logits/chosen": 0.06464245915412903, "logits/rejected": 0.09738686680793762, "logps/chosen": -200.98141479492188, "logps/rejected": -347.72216796875, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -1.7474255561828613, "rewards/margins": 17.487632751464844, "rewards/rejected": -19.23505973815918, "step": 4270 }, { "epoch": 1.46, "learning_rate": 3.6150343929610703e-07, "logits/chosen": -0.08135498315095901, "logits/rejected": -0.06664346158504486, "logps/chosen": -196.4805450439453, "logps/rejected": -413.8728332519531, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.5331372022628784, "rewards/margins": 19.062217712402344, "rewards/rejected": -20.595354080200195, "step": 4271 }, { "epoch": 1.46, "learning_rate": 3.6107815907659655e-07, "logits/chosen": -0.08101742714643478, "logits/rejected": -0.05617978796362877, "logps/chosen": -258.6863098144531, "logps/rejected": -448.0783996582031, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6127788424491882, "rewards/margins": 21.35350227355957, "rewards/rejected": -21.966278076171875, "step": 4272 }, { "epoch": 1.46, "learning_rate": 3.606530740388907e-07, "logits/chosen": -0.027568675577640533, "logits/rejected": 0.008070257492363453, "logps/chosen": -243.45513916015625, "logps/rejected": -411.151611328125, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -2.8563055992126465, "rewards/margins": 17.852760314941406, "rewards/rejected": -20.709068298339844, "step": 4273 }, { "epoch": 1.46, "learning_rate": 3.602281843128475e-07, "logits/chosen": 0.10969704389572144, "logits/rejected": 0.12603500485420227, "logps/chosen": -112.96627044677734, "logps/rejected": -262.4232177734375, "loss": 0.0463, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3429702520370483, "rewards/margins": 14.270163536071777, "rewards/rejected": -15.61313533782959, "step": 4274 }, { "epoch": 1.46, "learning_rate": 3.5980349002826537e-07, "logits/chosen": -0.06360351294279099, "logits/rejected": -0.027842869982123375, "logps/chosen": -258.45703125, "logps/rejected": -369.14208984375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.854401707649231, "rewards/margins": 14.094182968139648, "rewards/rejected": -14.94858455657959, "step": 4275 }, { "epoch": 1.46, "learning_rate": 3.593789913148818e-07, "logits/chosen": 0.0057622650638222694, "logits/rejected": 0.02594098076224327, "logps/chosen": -120.86358642578125, "logps/rejected": -298.3687744140625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.3296327590942383, "rewards/margins": 16.44347381591797, "rewards/rejected": -17.77310562133789, "step": 4276 }, { "epoch": 1.46, "learning_rate": 3.5895468830237565e-07, "logits/chosen": -0.09912094473838806, "logits/rejected": -0.06802897900342941, "logps/chosen": -281.05267333984375, "logps/rejected": -445.58795166015625, "loss": 0.0333, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3411221504211426, "rewards/margins": 16.60000228881836, "rewards/rejected": -17.941123962402344, "step": 4277 }, { "epoch": 1.46, "learning_rate": 3.5853058112036593e-07, "logits/chosen": -0.016626978293061256, "logits/rejected": 0.01937497965991497, "logps/chosen": -196.90518188476562, "logps/rejected": -364.5732421875, "loss": 0.0719, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9563971161842346, "rewards/margins": 15.688261032104492, "rewards/rejected": -16.644657135009766, "step": 4278 }, { "epoch": 1.46, "learning_rate": 3.5810666989841075e-07, "logits/chosen": 0.0629122257232666, "logits/rejected": 0.08391190320253372, "logps/chosen": -182.12777709960938, "logps/rejected": -367.8677978515625, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.0247951745986938, "rewards/margins": 18.9067440032959, "rewards/rejected": -19.93153953552246, "step": 4279 }, { "epoch": 1.46, "learning_rate": 3.576829547660097e-07, "logits/chosen": 0.027802884578704834, "logits/rejected": 0.05264313146471977, "logps/chosen": -229.46751403808594, "logps/rejected": -426.4868469238281, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.24697643518447876, "rewards/margins": 17.3759765625, "rewards/rejected": -17.12900161743164, "step": 4280 }, { "epoch": 1.46, "learning_rate": 3.572594358526019e-07, "logits/chosen": -0.009664923883974552, "logits/rejected": 0.013040806166827679, "logps/chosen": -189.3959197998047, "logps/rejected": -341.0926208496094, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -0.20625002682209015, "rewards/margins": 17.62652015686035, "rewards/rejected": -17.8327693939209, "step": 4281 }, { "epoch": 1.46, "learning_rate": 3.5683611328756625e-07, "logits/chosen": 0.0386248454451561, "logits/rejected": 0.06433376669883728, "logps/chosen": -193.24803161621094, "logps/rejected": -310.7197265625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.42423558235168457, "rewards/margins": 13.32165241241455, "rewards/rejected": -13.745888710021973, "step": 4282 }, { "epoch": 1.46, "learning_rate": 3.564129872002225e-07, "logits/chosen": 0.01219641137868166, "logits/rejected": 0.06295179575681686, "logps/chosen": -184.341796875, "logps/rejected": -323.8269348144531, "loss": 0.1063, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8784319162368774, "rewards/margins": 16.971115112304688, "rewards/rejected": -18.849546432495117, "step": 4283 }, { "epoch": 1.46, "learning_rate": 3.5599005771982913e-07, "logits/chosen": -0.014575829729437828, "logits/rejected": 0.0237252376973629, "logps/chosen": -248.57223510742188, "logps/rejected": -374.8541259765625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.7810357809066772, "rewards/margins": 15.569068908691406, "rewards/rejected": -16.3501033782959, "step": 4284 }, { "epoch": 1.46, "learning_rate": 3.5556732497558574e-07, "logits/chosen": 0.013509761542081833, "logits/rejected": 0.044286441057920456, "logps/chosen": -197.39938354492188, "logps/rejected": -386.7728271484375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.9081376791000366, "rewards/margins": 18.966598510742188, "rewards/rejected": -20.874736785888672, "step": 4285 }, { "epoch": 1.46, "learning_rate": 3.5514478909663105e-07, "logits/chosen": -0.0011109348852187395, "logits/rejected": 0.02555273473262787, "logps/chosen": -216.41952514648438, "logps/rejected": -416.63970947265625, "loss": 0.0271, "rewards/accuracies": 0.9375, "rewards/chosen": -1.890842080116272, "rewards/margins": 19.0362606048584, "rewards/rejected": -20.927104949951172, "step": 4286 }, { "epoch": 1.46, "learning_rate": 3.547224502120445e-07, "logits/chosen": -0.09499114006757736, "logits/rejected": -0.04862942919135094, "logps/chosen": -203.37567138671875, "logps/rejected": -356.9125671386719, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.39923894405365, "rewards/margins": 17.466514587402344, "rewards/rejected": -18.865753173828125, "step": 4287 }, { "epoch": 1.46, "learning_rate": 3.5430030845084403e-07, "logits/chosen": -0.06636267900466919, "logits/rejected": -0.04429589584469795, "logps/chosen": -153.99435424804688, "logps/rejected": -292.27117919921875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6329859495162964, "rewards/margins": 15.761557579040527, "rewards/rejected": -16.39454460144043, "step": 4288 }, { "epoch": 1.46, "learning_rate": 3.5387836394198844e-07, "logits/chosen": 0.16446198523044586, "logits/rejected": 0.18901877105236053, "logps/chosen": -201.4203643798828, "logps/rejected": -282.531005859375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7766894698143005, "rewards/margins": 13.419343948364258, "rewards/rejected": -14.196033477783203, "step": 4289 }, { "epoch": 1.46, "learning_rate": 3.5345661681437633e-07, "logits/chosen": 0.07221127301454544, "logits/rejected": 0.09843753278255463, "logps/chosen": -195.1924285888672, "logps/rejected": -342.78912353515625, "loss": 0.0421, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1182327270507812, "rewards/margins": 15.748222351074219, "rewards/rejected": -18.866455078125, "step": 4290 }, { "epoch": 1.46, "learning_rate": 3.5303506719684505e-07, "logits/chosen": 0.013806094415485859, "logits/rejected": 0.033069390803575516, "logps/chosen": -188.83078002929688, "logps/rejected": -367.2939758300781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.190578430891037, "rewards/margins": 16.820547103881836, "rewards/rejected": -17.011125564575195, "step": 4291 }, { "epoch": 1.46, "learning_rate": 3.526137152181724e-07, "logits/chosen": 0.022481251507997513, "logits/rejected": 0.029341939836740494, "logps/chosen": -186.978271484375, "logps/rejected": -364.09490966796875, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.9703221321105957, "rewards/margins": 15.702227592468262, "rewards/rejected": -18.672550201416016, "step": 4292 }, { "epoch": 1.47, "learning_rate": 3.5219256100707585e-07, "logits/chosen": 0.07559617608785629, "logits/rejected": 0.07620733976364136, "logps/chosen": -128.63229370117188, "logps/rejected": -320.6584777832031, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.073307991027832, "rewards/margins": 16.80066680908203, "rewards/rejected": -17.87397575378418, "step": 4293 }, { "epoch": 1.47, "learning_rate": 3.5177160469221176e-07, "logits/chosen": -0.046862076967954636, "logits/rejected": -0.024620993062853813, "logps/chosen": -205.43423461914062, "logps/rejected": -373.1189880371094, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8191696405410767, "rewards/margins": 18.4407901763916, "rewards/rejected": -19.259960174560547, "step": 4294 }, { "epoch": 1.47, "learning_rate": 3.513508464021766e-07, "logits/chosen": 0.02953708916902542, "logits/rejected": 0.03299665078520775, "logps/chosen": -210.22242736816406, "logps/rejected": -346.69818115234375, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -1.8351304531097412, "rewards/margins": 15.712419509887695, "rewards/rejected": -17.547550201416016, "step": 4295 }, { "epoch": 1.47, "learning_rate": 3.5093028626550635e-07, "logits/chosen": 0.018539484590291977, "logits/rejected": 0.08295358717441559, "logps/chosen": -204.4585723876953, "logps/rejected": -238.56686401367188, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.5610234141349792, "rewards/margins": 10.397751808166504, "rewards/rejected": -10.958775520324707, "step": 4296 }, { "epoch": 1.47, "learning_rate": 3.5050992441067627e-07, "logits/chosen": -0.050727445632219315, "logits/rejected": 0.006240044720470905, "logps/chosen": -215.31069946289062, "logps/rejected": -274.9985656738281, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.343029499053955, "rewards/margins": 11.48643684387207, "rewards/rejected": -13.829466819763184, "step": 4297 }, { "epoch": 1.47, "learning_rate": 3.5008976096610154e-07, "logits/chosen": 0.020076101645827293, "logits/rejected": 0.04116865247488022, "logps/chosen": -161.76666259765625, "logps/rejected": -303.67828369140625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.5345551371574402, "rewards/margins": 13.65843677520752, "rewards/rejected": -14.1929931640625, "step": 4298 }, { "epoch": 1.47, "learning_rate": 3.4966979606013546e-07, "logits/chosen": -0.09286555647850037, "logits/rejected": -0.07622408121824265, "logps/chosen": -251.89903259277344, "logps/rejected": -513.6183471679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.8024156093597412, "rewards/margins": 22.551788330078125, "rewards/rejected": -24.354202270507812, "step": 4299 }, { "epoch": 1.47, "learning_rate": 3.49250029821072e-07, "logits/chosen": 0.00026022648671641946, "logits/rejected": 0.00392291322350502, "logps/chosen": -189.93077087402344, "logps/rejected": -401.5236511230469, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5640283823013306, "rewards/margins": 19.729108810424805, "rewards/rejected": -21.293136596679688, "step": 4300 }, { "epoch": 1.47, "learning_rate": 3.4883046237714387e-07, "logits/chosen": -0.1573219746351242, "logits/rejected": -0.13055434823036194, "logps/chosen": -229.27330017089844, "logps/rejected": -389.7613830566406, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.9015579223632812, "rewards/margins": 16.81891441345215, "rewards/rejected": -18.72047233581543, "step": 4301 }, { "epoch": 1.47, "learning_rate": 3.4841109385652334e-07, "logits/chosen": -0.040176670998334885, "logits/rejected": -0.01815887726843357, "logps/chosen": -253.59402465820312, "logps/rejected": -437.2276916503906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.4792768955230713, "rewards/margins": 16.021894454956055, "rewards/rejected": -19.501169204711914, "step": 4302 }, { "epoch": 1.47, "learning_rate": 3.479919243873214e-07, "logits/chosen": -0.010879495181143284, "logits/rejected": 0.035019733011722565, "logps/chosen": -154.744384765625, "logps/rejected": -307.30523681640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.510636806488037, "rewards/margins": 15.516044616699219, "rewards/rejected": -18.02667999267578, "step": 4303 }, { "epoch": 1.47, "learning_rate": 3.4757295409758847e-07, "logits/chosen": -0.0006310457829385996, "logits/rejected": 0.011641710996627808, "logps/chosen": -187.6142578125, "logps/rejected": -342.8918762207031, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.3147194385528564, "rewards/margins": 13.877470016479492, "rewards/rejected": -15.19218921661377, "step": 4304 }, { "epoch": 1.47, "learning_rate": 3.4715418311531476e-07, "logits/chosen": -0.08312668651342392, "logits/rejected": -0.07428137958049774, "logps/chosen": -167.50485229492188, "logps/rejected": -333.1837158203125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.84983491897583, "rewards/margins": 15.098546981811523, "rewards/rejected": -17.948381423950195, "step": 4305 }, { "epoch": 1.47, "learning_rate": 3.4673561156842836e-07, "logits/chosen": -0.03116750717163086, "logits/rejected": -0.018470151349902153, "logps/chosen": -207.66622924804688, "logps/rejected": -398.7978515625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.9012367725372314, "rewards/margins": 18.071117401123047, "rewards/rejected": -19.972352981567383, "step": 4306 }, { "epoch": 1.47, "learning_rate": 3.463172395847973e-07, "logits/chosen": -0.0455310083925724, "logits/rejected": -0.008634241297841072, "logps/chosen": -161.70973205566406, "logps/rejected": -338.0144348144531, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.9737619161605835, "rewards/margins": 18.333932876586914, "rewards/rejected": -20.307697296142578, "step": 4307 }, { "epoch": 1.47, "learning_rate": 3.4589906729222896e-07, "logits/chosen": 0.02881801500916481, "logits/rejected": 0.08389584720134735, "logps/chosen": -179.5977020263672, "logps/rejected": -297.53668212890625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.9091180562973022, "rewards/margins": 14.403671264648438, "rewards/rejected": -16.312789916992188, "step": 4308 }, { "epoch": 1.47, "learning_rate": 3.4548109481846834e-07, "logits/chosen": -0.11012174189090729, "logits/rejected": -0.10490558296442032, "logps/chosen": -265.8672180175781, "logps/rejected": -486.34405517578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.996333122253418, "rewards/margins": 18.464523315429688, "rewards/rejected": -21.46085548400879, "step": 4309 }, { "epoch": 1.47, "learning_rate": 3.4506332229120074e-07, "logits/chosen": 0.02329671010375023, "logits/rejected": 0.023010695353150368, "logps/chosen": -173.4016876220703, "logps/rejected": -351.5385437011719, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.4104421138763428, "rewards/margins": 15.421314239501953, "rewards/rejected": -17.831754684448242, "step": 4310 }, { "epoch": 1.47, "learning_rate": 3.4464574983805015e-07, "logits/chosen": 0.054005563259124756, "logits/rejected": 0.07048928737640381, "logps/chosen": -180.14865112304688, "logps/rejected": -356.9393615722656, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": -1.8912767171859741, "rewards/margins": 16.610681533813477, "rewards/rejected": -18.501956939697266, "step": 4311 }, { "epoch": 1.47, "learning_rate": 3.442283775865783e-07, "logits/chosen": -0.055338405072689056, "logits/rejected": -0.02034205012023449, "logps/chosen": -219.26229858398438, "logps/rejected": -337.25360107421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3904244899749756, "rewards/margins": 13.668648719787598, "rewards/rejected": -15.059074401855469, "step": 4312 }, { "epoch": 1.47, "learning_rate": 3.438112056642879e-07, "logits/chosen": 0.04675773158669472, "logits/rejected": 0.07846078276634216, "logps/chosen": -224.94418334960938, "logps/rejected": -400.22772216796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3594839572906494, "rewards/margins": 19.694286346435547, "rewards/rejected": -21.05376625061035, "step": 4313 }, { "epoch": 1.47, "learning_rate": 3.4339423419861823e-07, "logits/chosen": 0.10289523005485535, "logits/rejected": 0.12576545774936676, "logps/chosen": -171.75198364257812, "logps/rejected": -323.110107421875, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.5949198007583618, "rewards/margins": 15.0654296875, "rewards/rejected": -15.66034984588623, "step": 4314 }, { "epoch": 1.47, "learning_rate": 3.4297746331694864e-07, "logits/chosen": 0.008487535640597343, "logits/rejected": 0.06345517933368683, "logps/chosen": -233.77017211914062, "logps/rejected": -271.54010009765625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.7704205513000488, "rewards/margins": 12.232733726501465, "rewards/rejected": -14.003154754638672, "step": 4315 }, { "epoch": 1.47, "learning_rate": 3.425608931465972e-07, "logits/chosen": 0.04530376195907593, "logits/rejected": 0.09105224907398224, "logps/chosen": -162.07838439941406, "logps/rejected": -270.83258056640625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8430726528167725, "rewards/margins": 13.977102279663086, "rewards/rejected": -14.820175170898438, "step": 4316 }, { "epoch": 1.47, "learning_rate": 3.421445238148197e-07, "logits/chosen": -0.01892096735537052, "logits/rejected": 0.0057688611559569836, "logps/chosen": -220.1259307861328, "logps/rejected": -388.3943786621094, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.2250947952270508, "rewards/margins": 17.494203567504883, "rewards/rejected": -18.71929931640625, "step": 4317 }, { "epoch": 1.47, "learning_rate": 3.4172835544881173e-07, "logits/chosen": -0.033606503158807755, "logits/rejected": -0.0007015031878836453, "logps/chosen": -258.5490417480469, "logps/rejected": -394.1745300292969, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.36555564403533936, "rewards/margins": 17.446331024169922, "rewards/rejected": -17.811885833740234, "step": 4318 }, { "epoch": 1.47, "learning_rate": 3.413123881757066e-07, "logits/chosen": -0.07956627756357193, "logits/rejected": -0.02547648921608925, "logps/chosen": -178.46189880371094, "logps/rejected": -344.813720703125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.2246674299240112, "rewards/margins": 15.676026344299316, "rewards/rejected": -16.900691986083984, "step": 4319 }, { "epoch": 1.47, "learning_rate": 3.408966221225773e-07, "logits/chosen": -0.0714937373995781, "logits/rejected": -0.05498658865690231, "logps/chosen": -190.3224639892578, "logps/rejected": -308.86358642578125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.431361198425293, "rewards/margins": 12.797216415405273, "rewards/rejected": -15.22857666015625, "step": 4320 }, { "epoch": 1.47, "learning_rate": 3.4048105741643375e-07, "logits/chosen": 0.024714717641472816, "logits/rejected": 0.0666051134467125, "logps/chosen": -255.6373291015625, "logps/rejected": -381.9921875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9991040825843811, "rewards/margins": 17.236501693725586, "rewards/rejected": -18.235607147216797, "step": 4321 }, { "epoch": 1.48, "learning_rate": 3.400656941842255e-07, "logits/chosen": -0.1031295657157898, "logits/rejected": -0.06335459649562836, "logps/chosen": -237.75296020507812, "logps/rejected": -371.55291748046875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.450453042984009, "rewards/margins": 14.45238208770752, "rewards/rejected": -16.902833938598633, "step": 4322 }, { "epoch": 1.48, "learning_rate": 3.396505325528408e-07, "logits/chosen": 0.08177082985639572, "logits/rejected": 0.11583948135375977, "logps/chosen": -170.39503479003906, "logps/rejected": -271.0329895019531, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2817097902297974, "rewards/margins": 15.611351013183594, "rewards/rejected": -16.893062591552734, "step": 4323 }, { "epoch": 1.48, "learning_rate": 3.3923557264910505e-07, "logits/chosen": 0.02746783010661602, "logits/rejected": 0.05328948423266411, "logps/chosen": -195.44363403320312, "logps/rejected": -356.818603515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2847663164138794, "rewards/margins": 16.426549911499023, "rewards/rejected": -17.711315155029297, "step": 4324 }, { "epoch": 1.48, "learning_rate": 3.388208145997831e-07, "logits/chosen": -0.04491819441318512, "logits/rejected": -0.005462833680212498, "logps/chosen": -261.69671630859375, "logps/rejected": -490.35137939453125, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -2.8511388301849365, "rewards/margins": 21.319561004638672, "rewards/rejected": -24.17070198059082, "step": 4325 }, { "epoch": 1.48, "learning_rate": 3.3840625853157813e-07, "logits/chosen": 0.0532471239566803, "logits/rejected": 0.06445740163326263, "logps/chosen": -184.4385528564453, "logps/rejected": -331.50592041015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3498930931091309, "rewards/margins": 15.516043663024902, "rewards/rejected": -16.865938186645508, "step": 4326 }, { "epoch": 1.48, "learning_rate": 3.379919045711304e-07, "logits/chosen": -0.0475357249379158, "logits/rejected": -0.022837871685624123, "logps/chosen": -264.51300048828125, "logps/rejected": -380.84124755859375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.6350781917572021, "rewards/margins": 13.186697959899902, "rewards/rejected": -14.821775436401367, "step": 4327 }, { "epoch": 1.48, "learning_rate": 3.3757775284502043e-07, "logits/chosen": 0.024773165583610535, "logits/rejected": 0.04427914321422577, "logps/chosen": -179.77264404296875, "logps/rejected": -322.54205322265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.8501806259155273, "rewards/margins": 16.00388526916504, "rewards/rejected": -17.854066848754883, "step": 4328 }, { "epoch": 1.48, "learning_rate": 3.3716380347976514e-07, "logits/chosen": 0.04289593547582626, "logits/rejected": 0.04393923282623291, "logps/chosen": -151.77032470703125, "logps/rejected": -320.3656005859375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.8791465759277344, "rewards/margins": 14.278657913208008, "rewards/rejected": -17.157806396484375, "step": 4329 }, { "epoch": 1.48, "learning_rate": 3.3675005660182044e-07, "logits/chosen": -0.006615992169827223, "logits/rejected": 0.01799856126308441, "logps/chosen": -186.9290313720703, "logps/rejected": -343.5873107910156, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.2771291732788086, "rewards/margins": 14.722600936889648, "rewards/rejected": -16.99972915649414, "step": 4330 }, { "epoch": 1.48, "learning_rate": 3.3633651233758086e-07, "logits/chosen": 0.1458345651626587, "logits/rejected": 0.15697242319583893, "logps/chosen": -181.85284423828125, "logps/rejected": -358.5345764160156, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.3894145488739014, "rewards/margins": 15.340771675109863, "rewards/rejected": -17.730186462402344, "step": 4331 }, { "epoch": 1.48, "learning_rate": 3.359231708133775e-07, "logits/chosen": 0.022972168400883675, "logits/rejected": 0.015297889709472656, "logps/chosen": -237.97032165527344, "logps/rejected": -484.3893737792969, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.19511342048645, "rewards/margins": 20.799903869628906, "rewards/rejected": -22.995018005371094, "step": 4332 }, { "epoch": 1.48, "learning_rate": 3.3551003215548113e-07, "logits/chosen": 0.10685174912214279, "logits/rejected": 0.1400584578514099, "logps/chosen": -146.02862548828125, "logps/rejected": -344.46221923828125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.6259748935699463, "rewards/margins": 19.28130531311035, "rewards/rejected": -19.90727996826172, "step": 4333 }, { "epoch": 1.48, "learning_rate": 3.3509709649009975e-07, "logits/chosen": 0.019408009946346283, "logits/rejected": 0.04286540299654007, "logps/chosen": -188.4259033203125, "logps/rejected": -371.537841796875, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.9036704301834106, "rewards/margins": 18.595531463623047, "rewards/rejected": -19.499204635620117, "step": 4334 }, { "epoch": 1.48, "learning_rate": 3.3468436394337985e-07, "logits/chosen": 0.011861545965075493, "logits/rejected": 0.05067894607782364, "logps/chosen": -170.45816040039062, "logps/rejected": -300.6982727050781, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -1.9081966876983643, "rewards/margins": 14.681425094604492, "rewards/rejected": -16.589622497558594, "step": 4335 }, { "epoch": 1.48, "learning_rate": 3.3427183464140487e-07, "logits/chosen": 0.059618331491947174, "logits/rejected": 0.07736340165138245, "logps/chosen": -189.57949829101562, "logps/rejected": -346.80181884765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6827658414840698, "rewards/margins": 15.920713424682617, "rewards/rejected": -17.603479385375977, "step": 4336 }, { "epoch": 1.48, "learning_rate": 3.3385950871019717e-07, "logits/chosen": 0.07465988397598267, "logits/rejected": 0.07436936348676682, "logps/chosen": -219.79434204101562, "logps/rejected": -362.2137451171875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.5540742874145508, "rewards/margins": 14.93508243560791, "rewards/rejected": -16.48915672302246, "step": 4337 }, { "epoch": 1.48, "learning_rate": 3.33447386275717e-07, "logits/chosen": -0.04963836818933487, "logits/rejected": -0.02190297842025757, "logps/chosen": -222.9297332763672, "logps/rejected": -371.1272277832031, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.299842119216919, "rewards/margins": 17.077342987060547, "rewards/rejected": -18.377182006835938, "step": 4338 }, { "epoch": 1.48, "learning_rate": 3.330354674638612e-07, "logits/chosen": 0.061547745019197464, "logits/rejected": 0.08070602267980576, "logps/chosen": -216.4593963623047, "logps/rejected": -343.6905212402344, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -3.4220643043518066, "rewards/margins": 13.716679573059082, "rewards/rejected": -17.138742446899414, "step": 4339 }, { "epoch": 1.48, "learning_rate": 3.3262375240046593e-07, "logits/chosen": 0.09346688538789749, "logits/rejected": 0.10710876435041428, "logps/chosen": -180.5714111328125, "logps/rejected": -344.39691162109375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.6927789449691772, "rewards/margins": 15.006001472473145, "rewards/rejected": -16.698780059814453, "step": 4340 }, { "epoch": 1.48, "learning_rate": 3.3221224121130466e-07, "logits/chosen": -0.023433107882738113, "logits/rejected": 0.003783287014812231, "logps/chosen": -208.70465087890625, "logps/rejected": -387.6407775878906, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.084754228591919, "rewards/margins": 18.809722900390625, "rewards/rejected": -19.894479751586914, "step": 4341 }, { "epoch": 1.48, "learning_rate": 3.3180093402208733e-07, "logits/chosen": -0.07394158840179443, "logits/rejected": -0.09416434913873672, "logps/chosen": -196.56378173828125, "logps/rejected": -461.5335998535156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.7503724098205566, "rewards/margins": 20.992658615112305, "rewards/rejected": -23.743030548095703, "step": 4342 }, { "epoch": 1.48, "learning_rate": 3.3138983095846386e-07, "logits/chosen": 0.004396082367748022, "logits/rejected": 0.05180734023451805, "logps/chosen": -199.88441467285156, "logps/rejected": -346.8774108886719, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.490086078643799, "rewards/margins": 15.163204193115234, "rewards/rejected": -17.653289794921875, "step": 4343 }, { "epoch": 1.48, "learning_rate": 3.309789321460201e-07, "logits/chosen": 0.0028897346928715706, "logits/rejected": 0.026070255786180496, "logps/chosen": -211.95077514648438, "logps/rejected": -398.5183410644531, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.9905786514282227, "rewards/margins": 18.23573112487793, "rewards/rejected": -21.226308822631836, "step": 4344 }, { "epoch": 1.48, "learning_rate": 3.305682377102792e-07, "logits/chosen": 0.12719807028770447, "logits/rejected": 0.13407833874225616, "logps/chosen": -159.05899047851562, "logps/rejected": -346.3960266113281, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -1.0068563222885132, "rewards/margins": 15.488046646118164, "rewards/rejected": -16.494903564453125, "step": 4345 }, { "epoch": 1.48, "learning_rate": 3.3015774777670403e-07, "logits/chosen": -0.00917215645313263, "logits/rejected": 0.023194003850221634, "logps/chosen": -262.1029357910156, "logps/rejected": -410.4757385253906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3480197787284851, "rewards/margins": 17.28655433654785, "rewards/rejected": -17.634578704833984, "step": 4346 }, { "epoch": 1.48, "learning_rate": 3.297474624706925e-07, "logits/chosen": -0.037123922258615494, "logits/rejected": -0.01995665952563286, "logps/chosen": -228.0020751953125, "logps/rejected": -376.0021057128906, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -2.741023540496826, "rewards/margins": 15.688370704650879, "rewards/rejected": -18.429393768310547, "step": 4347 }, { "epoch": 1.48, "learning_rate": 3.293373819175815e-07, "logits/chosen": 0.017591815441846848, "logits/rejected": 0.024890244007110596, "logps/chosen": -212.04486083984375, "logps/rejected": -450.2959289550781, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.8320895433425903, "rewards/margins": 20.53506851196289, "rewards/rejected": -22.367156982421875, "step": 4348 }, { "epoch": 1.48, "learning_rate": 3.289275062426453e-07, "logits/chosen": -0.029502058401703835, "logits/rejected": -0.012658007442951202, "logps/chosen": -209.1317138671875, "logps/rejected": -404.5144348144531, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.0842833518981934, "rewards/margins": 19.18617057800293, "rewards/rejected": -22.270456314086914, "step": 4349 }, { "epoch": 1.48, "learning_rate": 3.2851783557109467e-07, "logits/chosen": 0.15078610181808472, "logits/rejected": 0.15677998960018158, "logps/chosen": -228.25228881835938, "logps/rejected": -350.4071044921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8014508485794067, "rewards/margins": 14.150588035583496, "rewards/rejected": -14.952038764953613, "step": 4350 }, { "epoch": 1.48, "learning_rate": 3.2810837002807857e-07, "logits/chosen": 0.11068419367074966, "logits/rejected": 0.13130395114421844, "logps/chosen": -146.84275817871094, "logps/rejected": -262.41558837890625, "loss": 0.0469, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9735991954803467, "rewards/margins": 12.295263290405273, "rewards/rejected": -14.268863677978516, "step": 4351 }, { "epoch": 1.49, "learning_rate": 3.276991097386831e-07, "logits/chosen": 0.046299487352371216, "logits/rejected": 0.08309553563594818, "logps/chosen": -230.3987274169922, "logps/rejected": -327.6669616699219, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.6097629070281982, "rewards/margins": 12.989957809448242, "rewards/rejected": -14.599720001220703, "step": 4352 }, { "epoch": 1.49, "learning_rate": 3.2729005482793204e-07, "logits/chosen": 0.08826086670160294, "logits/rejected": 0.13473831117153168, "logps/chosen": -220.15679931640625, "logps/rejected": -253.81178283691406, "loss": 0.0195, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5518860816955566, "rewards/margins": 9.92520809173584, "rewards/rejected": -12.477094650268555, "step": 4353 }, { "epoch": 1.49, "learning_rate": 3.268812054207852e-07, "logits/chosen": 0.035760801285505295, "logits/rejected": 0.050094351172447205, "logps/chosen": -179.5376434326172, "logps/rejected": -326.8556823730469, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.2398324012756348, "rewards/margins": 15.129794120788574, "rewards/rejected": -16.369625091552734, "step": 4354 }, { "epoch": 1.49, "learning_rate": 3.26472561642141e-07, "logits/chosen": 0.10996432602405548, "logits/rejected": 0.15110240876674652, "logps/chosen": -191.37811279296875, "logps/rejected": -340.3448486328125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.9290764331817627, "rewards/margins": 15.795269012451172, "rewards/rejected": -18.72434425354004, "step": 4355 }, { "epoch": 1.49, "learning_rate": 3.2606412361683456e-07, "logits/chosen": 0.05835554376244545, "logits/rejected": 0.10408399254083633, "logps/chosen": -224.84173583984375, "logps/rejected": -357.47772216796875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8057236671447754, "rewards/margins": 19.7523193359375, "rewards/rejected": -20.558042526245117, "step": 4356 }, { "epoch": 1.49, "learning_rate": 3.2565589146963725e-07, "logits/chosen": -0.004825172014534473, "logits/rejected": 0.03635075315833092, "logps/chosen": -212.6399688720703, "logps/rejected": -369.1756896972656, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.7857787609100342, "rewards/margins": 16.541393280029297, "rewards/rejected": -18.327171325683594, "step": 4357 }, { "epoch": 1.49, "learning_rate": 3.252478653252595e-07, "logits/chosen": 0.023007530719041824, "logits/rejected": 0.06197601184248924, "logps/chosen": -230.10765075683594, "logps/rejected": -375.8346862792969, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": -2.4339983463287354, "rewards/margins": 18.76791000366211, "rewards/rejected": -21.201908111572266, "step": 4358 }, { "epoch": 1.49, "learning_rate": 3.248400453083472e-07, "logits/chosen": 0.03305080533027649, "logits/rejected": 0.06168146803975105, "logps/chosen": -222.08172607421875, "logps/rejected": -357.14398193359375, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -2.640753984451294, "rewards/margins": 16.485193252563477, "rewards/rejected": -19.125946044921875, "step": 4359 }, { "epoch": 1.49, "learning_rate": 3.24432431543483e-07, "logits/chosen": 0.08231374621391296, "logits/rejected": 0.08154682070016861, "logps/chosen": -186.28750610351562, "logps/rejected": -361.843994140625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.6456552743911743, "rewards/margins": 15.20701789855957, "rewards/rejected": -16.852672576904297, "step": 4360 }, { "epoch": 1.49, "learning_rate": 3.2402502415518853e-07, "logits/chosen": 0.03851688280701637, "logits/rejected": 0.087241031229496, "logps/chosen": -189.7017822265625, "logps/rejected": -329.61370849609375, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.77333003282547, "rewards/margins": 17.020065307617188, "rewards/rejected": -17.79339599609375, "step": 4361 }, { "epoch": 1.49, "learning_rate": 3.236178232679202e-07, "logits/chosen": 0.020810090005397797, "logits/rejected": 0.04961962625384331, "logps/chosen": -189.50559997558594, "logps/rejected": -355.3583679199219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.41352254152297974, "rewards/margins": 18.712379455566406, "rewards/rejected": -19.12590217590332, "step": 4362 }, { "epoch": 1.49, "learning_rate": 3.232108290060727e-07, "logits/chosen": -0.039531733840703964, "logits/rejected": -0.02427005209028721, "logps/chosen": -226.29678344726562, "logps/rejected": -363.9898681640625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.1235326528549194, "rewards/margins": 14.501570701599121, "rewards/rejected": -15.625102043151855, "step": 4363 }, { "epoch": 1.49, "learning_rate": 3.2280404149397723e-07, "logits/chosen": 0.02497211843729019, "logits/rejected": 0.03929158300161362, "logps/chosen": -181.42689514160156, "logps/rejected": -316.1735534667969, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8734816908836365, "rewards/margins": 15.296987533569336, "rewards/rejected": -16.170469284057617, "step": 4364 }, { "epoch": 1.49, "learning_rate": 3.2239746085590125e-07, "logits/chosen": -0.06349219381809235, "logits/rejected": -0.043592195957899094, "logps/chosen": -231.53636169433594, "logps/rejected": -411.42071533203125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.6847161054611206, "rewards/margins": 17.493907928466797, "rewards/rejected": -18.17862319946289, "step": 4365 }, { "epoch": 1.49, "learning_rate": 3.2199108721604985e-07, "logits/chosen": 0.028129665181040764, "logits/rejected": 0.0368146151304245, "logps/chosen": -267.2594909667969, "logps/rejected": -449.8271789550781, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.794367551803589, "rewards/margins": 16.186037063598633, "rewards/rejected": -19.980405807495117, "step": 4366 }, { "epoch": 1.49, "learning_rate": 3.215849206985647e-07, "logits/chosen": 0.0506366528570652, "logits/rejected": 0.06325429677963257, "logps/chosen": -219.58848571777344, "logps/rejected": -396.6739501953125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.4886112213134766, "rewards/margins": 16.528194427490234, "rewards/rejected": -19.01680564880371, "step": 4367 }, { "epoch": 1.49, "learning_rate": 3.211789614275241e-07, "logits/chosen": 0.030704904347658157, "logits/rejected": 0.06283333897590637, "logps/chosen": -204.95285034179688, "logps/rejected": -334.6749572753906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9941282272338867, "rewards/margins": 16.3445987701416, "rewards/rejected": -19.338726043701172, "step": 4368 }, { "epoch": 1.49, "learning_rate": 3.2077320952694266e-07, "logits/chosen": -0.029088320210576057, "logits/rejected": -0.020696358755230904, "logps/chosen": -268.2171325683594, "logps/rejected": -415.18865966796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4231215715408325, "rewards/margins": 15.092378616333008, "rewards/rejected": -16.515501022338867, "step": 4369 }, { "epoch": 1.49, "learning_rate": 3.203676651207722e-07, "logits/chosen": 0.03923241049051285, "logits/rejected": 0.061412472277879715, "logps/chosen": -207.84555053710938, "logps/rejected": -348.0697326660156, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -2.9614975452423096, "rewards/margins": 14.87785530090332, "rewards/rejected": -17.839353561401367, "step": 4370 }, { "epoch": 1.49, "learning_rate": 3.1996232833290104e-07, "logits/chosen": 0.026433689519762993, "logits/rejected": 0.06489694863557816, "logps/chosen": -209.7329864501953, "logps/rejected": -367.0749206542969, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.6248575448989868, "rewards/margins": 18.199085235595703, "rewards/rejected": -19.823944091796875, "step": 4371 }, { "epoch": 1.49, "learning_rate": 3.195571992871534e-07, "logits/chosen": -0.08637376874685287, "logits/rejected": -0.06880131363868713, "logps/chosen": -267.4007873535156, "logps/rejected": -412.2017517089844, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.5526604056358337, "rewards/margins": 16.846439361572266, "rewards/rejected": -17.399099349975586, "step": 4372 }, { "epoch": 1.49, "learning_rate": 3.191522781072916e-07, "logits/chosen": -0.02575048804283142, "logits/rejected": 0.03068329207599163, "logps/chosen": -227.55099487304688, "logps/rejected": -342.09185791015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.7382731437683105, "rewards/margins": 16.15713119506836, "rewards/rejected": -18.895402908325195, "step": 4373 }, { "epoch": 1.49, "learning_rate": 3.1874756491701303e-07, "logits/chosen": 0.02960686758160591, "logits/rejected": 0.03216787427663803, "logps/chosen": -220.24169921875, "logps/rejected": -391.85369873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1481564044952393, "rewards/margins": 14.981996536254883, "rewards/rejected": -17.13015365600586, "step": 4374 }, { "epoch": 1.49, "learning_rate": 3.183430598399511e-07, "logits/chosen": -0.009545340202748775, "logits/rejected": 0.02994990535080433, "logps/chosen": -207.49114990234375, "logps/rejected": -368.01007080078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.44320452213287354, "rewards/margins": 18.019969940185547, "rewards/rejected": -18.463172912597656, "step": 4375 }, { "epoch": 1.49, "learning_rate": 3.1793876299967814e-07, "logits/chosen": 0.01930204965174198, "logits/rejected": 0.053469717502593994, "logps/chosen": -202.93048095703125, "logps/rejected": -382.657470703125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.3791797161102295, "rewards/margins": 18.40373992919922, "rewards/rejected": -20.78291893005371, "step": 4376 }, { "epoch": 1.49, "learning_rate": 3.1753467451969995e-07, "logits/chosen": 0.061659835278987885, "logits/rejected": 0.10625068098306656, "logps/chosen": -240.4759521484375, "logps/rejected": -381.534423828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.723421812057495, "rewards/margins": 16.988828659057617, "rewards/rejected": -19.712251663208008, "step": 4377 }, { "epoch": 1.49, "learning_rate": 3.171307945234606e-07, "logits/chosen": -0.17781595885753632, "logits/rejected": -0.15359388291835785, "logps/chosen": -176.01315307617188, "logps/rejected": -379.73236083984375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -3.210977792739868, "rewards/margins": 18.8250789642334, "rewards/rejected": -22.03605842590332, "step": 4378 }, { "epoch": 1.49, "learning_rate": 3.167271231343399e-07, "logits/chosen": 0.025068597868084908, "logits/rejected": 0.06512250751256943, "logps/chosen": -193.9127197265625, "logps/rejected": -334.11944580078125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.5186598300933838, "rewards/margins": 18.879608154296875, "rewards/rejected": -18.360950469970703, "step": 4379 }, { "epoch": 1.49, "learning_rate": 3.1632366047565353e-07, "logits/chosen": -0.022405438125133514, "logits/rejected": -0.018901515752077103, "logps/chosen": -216.9024658203125, "logps/rejected": -426.8997802734375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.071171522140503, "rewards/margins": 18.258569717407227, "rewards/rejected": -19.329740524291992, "step": 4380 }, { "epoch": 1.5, "learning_rate": 3.159204066706539e-07, "logits/chosen": 0.015964556485414505, "logits/rejected": 0.05063318461179733, "logps/chosen": -168.78285217285156, "logps/rejected": -344.32666015625, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": -1.1272335052490234, "rewards/margins": 17.054468154907227, "rewards/rejected": -18.18170166015625, "step": 4381 }, { "epoch": 1.5, "learning_rate": 3.1551736184252977e-07, "logits/chosen": -0.10436557978391647, "logits/rejected": -0.07763846218585968, "logps/chosen": -207.28372192382812, "logps/rejected": -365.1382141113281, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.488970160484314, "rewards/margins": 16.163015365600586, "rewards/rejected": -17.65198516845703, "step": 4382 }, { "epoch": 1.5, "learning_rate": 3.1511452611440527e-07, "logits/chosen": -0.0028436477296054363, "logits/rejected": 0.030135750770568848, "logps/chosen": -291.7171325683594, "logps/rejected": -399.29608154296875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.34610557556152344, "rewards/margins": 16.63007164001465, "rewards/rejected": -16.976177215576172, "step": 4383 }, { "epoch": 1.5, "learning_rate": 3.147118996093414e-07, "logits/chosen": 0.00947022344917059, "logits/rejected": 0.026200825348496437, "logps/chosen": -236.99295043945312, "logps/rejected": -420.8314208984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.630598545074463, "rewards/margins": 17.916534423828125, "rewards/rejected": -20.547134399414062, "step": 4384 }, { "epoch": 1.5, "learning_rate": 3.1430948245033506e-07, "logits/chosen": 0.007266980595886707, "logits/rejected": 0.08220524340867996, "logps/chosen": -245.11911010742188, "logps/rejected": -341.8081359863281, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664315223693848, "rewards/margins": 16.056398391723633, "rewards/rejected": -17.62282943725586, "step": 4385 }, { "epoch": 1.5, "learning_rate": 3.139072747603194e-07, "logits/chosen": 0.010105063207447529, "logits/rejected": 0.028169963508844376, "logps/chosen": -273.7413330078125, "logps/rejected": -456.167724609375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.520709753036499, "rewards/margins": 17.54916000366211, "rewards/rejected": -20.069868087768555, "step": 4386 }, { "epoch": 1.5, "learning_rate": 3.1350527666216263e-07, "logits/chosen": 0.14495913684368134, "logits/rejected": 0.15901724994182587, "logps/chosen": -181.63327026367188, "logps/rejected": -356.3822937011719, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.9720405340194702, "rewards/margins": 16.65891456604004, "rewards/rejected": -18.63095474243164, "step": 4387 }, { "epoch": 1.5, "learning_rate": 3.1310348827867005e-07, "logits/chosen": -0.10788783431053162, "logits/rejected": -0.09566053748130798, "logps/chosen": -221.7308807373047, "logps/rejected": -401.4144592285156, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.725021243095398, "rewards/margins": 14.694558143615723, "rewards/rejected": -15.41957950592041, "step": 4388 }, { "epoch": 1.5, "learning_rate": 3.1270190973258284e-07, "logits/chosen": 0.0907621681690216, "logits/rejected": 0.12481584399938583, "logps/chosen": -201.4434051513672, "logps/rejected": -341.3499755859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2745792865753174, "rewards/margins": 15.317316055297852, "rewards/rejected": -16.591896057128906, "step": 4389 }, { "epoch": 1.5, "learning_rate": 3.123005411465766e-07, "logits/chosen": -0.05339006707072258, "logits/rejected": -0.04988336190581322, "logps/chosen": -265.98626708984375, "logps/rejected": -385.7613525390625, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.2087634801864624, "rewards/margins": 13.403783798217773, "rewards/rejected": -14.612546920776367, "step": 4390 }, { "epoch": 1.5, "learning_rate": 3.118993826432651e-07, "logits/chosen": 0.11453446000814438, "logits/rejected": 0.14702168107032776, "logps/chosen": -208.63609313964844, "logps/rejected": -344.2193603515625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.4785375595092773, "rewards/margins": 14.609548568725586, "rewards/rejected": -17.088085174560547, "step": 4391 }, { "epoch": 1.5, "learning_rate": 3.114984343451963e-07, "logits/chosen": 0.12415239214897156, "logits/rejected": 0.16183917224407196, "logps/chosen": -184.05661010742188, "logps/rejected": -369.9964599609375, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.9000606536865234, "rewards/margins": 18.49320411682129, "rewards/rejected": -20.393264770507812, "step": 4392 }, { "epoch": 1.5, "learning_rate": 3.110976963748536e-07, "logits/chosen": 0.019047632813453674, "logits/rejected": 0.0398724228143692, "logps/chosen": -196.09812927246094, "logps/rejected": -363.8411560058594, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.454355001449585, "rewards/margins": 15.542716979980469, "rewards/rejected": -18.997072219848633, "step": 4393 }, { "epoch": 1.5, "learning_rate": 3.1069716885465825e-07, "logits/chosen": 0.06341458112001419, "logits/rejected": 0.0881524533033371, "logps/chosen": -168.32266235351562, "logps/rejected": -306.04925537109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3659311532974243, "rewards/margins": 16.27793312072754, "rewards/rejected": -17.64386558532715, "step": 4394 }, { "epoch": 1.5, "learning_rate": 3.1029685190696495e-07, "logits/chosen": 0.020826993510127068, "logits/rejected": 0.07280756533145905, "logps/chosen": -195.78163146972656, "logps/rejected": -362.32537841796875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.5301506519317627, "rewards/margins": 17.69802474975586, "rewards/rejected": -20.228174209594727, "step": 4395 }, { "epoch": 1.5, "learning_rate": 3.098967456540652e-07, "logits/chosen": -0.0855996310710907, "logits/rejected": -0.033429473638534546, "logps/chosen": -239.0665740966797, "logps/rejected": -445.3653259277344, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.3149139881134033, "rewards/margins": 21.3874568939209, "rewards/rejected": -21.702369689941406, "step": 4396 }, { "epoch": 1.5, "learning_rate": 3.094968502181864e-07, "logits/chosen": 0.009190498851239681, "logits/rejected": 0.05668855458498001, "logps/chosen": -260.3000183105469, "logps/rejected": -319.2230529785156, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -2.0435924530029297, "rewards/margins": 12.870965957641602, "rewards/rejected": -14.914558410644531, "step": 4397 }, { "epoch": 1.5, "learning_rate": 3.0909716572149035e-07, "logits/chosen": 0.05123617872595787, "logits/rejected": 0.07331855595111847, "logps/chosen": -198.79701232910156, "logps/rejected": -345.16900634765625, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.0522561073303223, "rewards/margins": 15.647686004638672, "rewards/rejected": -17.69994354248047, "step": 4398 }, { "epoch": 1.5, "learning_rate": 3.086976922860756e-07, "logits/chosen": 0.03884297236800194, "logits/rejected": 0.052495844662189484, "logps/chosen": -181.13677978515625, "logps/rejected": -309.3338623046875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -2.978314161300659, "rewards/margins": 12.731573104858398, "rewards/rejected": -15.709888458251953, "step": 4399 }, { "epoch": 1.5, "learning_rate": 3.082984300339756e-07, "logits/chosen": 0.05582261085510254, "logits/rejected": 0.09294106066226959, "logps/chosen": -243.6866455078125, "logps/rejected": -363.88299560546875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.2475770711898804, "rewards/margins": 16.766071319580078, "rewards/rejected": -18.013648986816406, "step": 4400 }, { "epoch": 1.5, "learning_rate": 3.078993790871598e-07, "logits/chosen": 0.10155589878559113, "logits/rejected": 0.12414388358592987, "logps/chosen": -145.73512268066406, "logps/rejected": -265.88726806640625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.9359225034713745, "rewards/margins": 13.537559509277344, "rewards/rejected": -15.473482131958008, "step": 4401 }, { "epoch": 1.5, "learning_rate": 3.075005395675322e-07, "logits/chosen": 0.05910877138376236, "logits/rejected": 0.09663108736276627, "logps/chosen": -198.36952209472656, "logps/rejected": -379.7991638183594, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.2283918857574463, "rewards/margins": 18.950117111206055, "rewards/rejected": -20.178508758544922, "step": 4402 }, { "epoch": 1.5, "learning_rate": 3.071019115969331e-07, "logits/chosen": -0.04984736442565918, "logits/rejected": -0.029368501156568527, "logps/chosen": -193.206298828125, "logps/rejected": -335.06878662109375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.395674705505371, "rewards/margins": 14.2828369140625, "rewards/rejected": -15.678510665893555, "step": 4403 }, { "epoch": 1.5, "learning_rate": 3.0670349529713814e-07, "logits/chosen": -0.0024674287997186184, "logits/rejected": 0.04807952791452408, "logps/chosen": -184.66000366210938, "logps/rejected": -294.3439636230469, "loss": 0.0343, "rewards/accuracies": 0.9375, "rewards/chosen": -2.32595157623291, "rewards/margins": 12.630908012390137, "rewards/rejected": -14.956859588623047, "step": 4404 }, { "epoch": 1.5, "learning_rate": 3.06305290789857e-07, "logits/chosen": -0.010676036588847637, "logits/rejected": -0.013361466117203236, "logps/chosen": -210.98133850097656, "logps/rejected": -378.24078369140625, "loss": 0.0855, "rewards/accuracies": 1.0, "rewards/chosen": -0.9327392578125, "rewards/margins": 14.606517791748047, "rewards/rejected": -15.539257049560547, "step": 4405 }, { "epoch": 1.5, "learning_rate": 3.05907298196737e-07, "logits/chosen": 0.06737245619297028, "logits/rejected": 0.09578125178813934, "logps/chosen": -177.70242309570312, "logps/rejected": -344.76043701171875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.9510307312011719, "rewards/margins": 17.672502517700195, "rewards/rejected": -19.62353515625, "step": 4406 }, { "epoch": 1.5, "learning_rate": 3.055095176393586e-07, "logits/chosen": 0.03461984544992447, "logits/rejected": 0.0713416114449501, "logps/chosen": -204.81089782714844, "logps/rejected": -389.0757141113281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8536511063575745, "rewards/margins": 21.300151824951172, "rewards/rejected": -22.15380096435547, "step": 4407 }, { "epoch": 1.5, "learning_rate": 3.051119492392379e-07, "logits/chosen": 0.06109805032610893, "logits/rejected": 0.07408875972032547, "logps/chosen": -197.58328247070312, "logps/rejected": -341.08184814453125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.9643712043762207, "rewards/margins": 14.041495323181152, "rewards/rejected": -17.00586700439453, "step": 4408 }, { "epoch": 1.5, "learning_rate": 3.0471459311782754e-07, "logits/chosen": -0.03295816853642464, "logits/rejected": -0.012696613557636738, "logps/chosen": -245.47640991210938, "logps/rejected": -418.9481201171875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2287436723709106, "rewards/margins": 17.64110565185547, "rewards/rejected": -18.869848251342773, "step": 4409 }, { "epoch": 1.51, "learning_rate": 3.043174493965136e-07, "logits/chosen": 0.013673997484147549, "logits/rejected": 0.05395479500293732, "logps/chosen": -258.09039306640625, "logps/rejected": -423.7416076660156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.0509250164031982, "rewards/margins": 17.629396438598633, "rewards/rejected": -19.680320739746094, "step": 4410 }, { "epoch": 1.51, "learning_rate": 3.039205181966182e-07, "logits/chosen": 0.06259959191083908, "logits/rejected": 0.09210248291492462, "logps/chosen": -172.81851196289062, "logps/rejected": -290.01763916015625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.9722685813903809, "rewards/margins": 12.864203453063965, "rewards/rejected": -13.836472511291504, "step": 4411 }, { "epoch": 1.51, "learning_rate": 3.035237996393988e-07, "logits/chosen": -0.0772177055478096, "logits/rejected": -0.04561130329966545, "logps/chosen": -241.74447631835938, "logps/rejected": -371.03326416015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.7870415449142456, "rewards/margins": 13.927226066589355, "rewards/rejected": -15.71426773071289, "step": 4412 }, { "epoch": 1.51, "learning_rate": 3.0312729384604665e-07, "logits/chosen": -0.05861281603574753, "logits/rejected": -0.03176199272274971, "logps/chosen": -224.12460327148438, "logps/rejected": -394.4514465332031, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.5370328426361084, "rewards/margins": 18.205669403076172, "rewards/rejected": -19.74270248413086, "step": 4413 }, { "epoch": 1.51, "learning_rate": 3.027310009376891e-07, "logits/chosen": 0.15516723692417145, "logits/rejected": 0.1497984081506729, "logps/chosen": -172.70611572265625, "logps/rejected": -335.0188293457031, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.6753320693969727, "rewards/margins": 13.809154510498047, "rewards/rejected": -16.484487533569336, "step": 4414 }, { "epoch": 1.51, "learning_rate": 3.023349210353883e-07, "logits/chosen": 0.09975741803646088, "logits/rejected": 0.12526468932628632, "logps/chosen": -169.22796630859375, "logps/rejected": -293.97003173828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9728734493255615, "rewards/margins": 14.233963966369629, "rewards/rejected": -17.206836700439453, "step": 4415 }, { "epoch": 1.51, "learning_rate": 3.019390542601414e-07, "logits/chosen": -0.0022732459474354982, "logits/rejected": -0.004747386090457439, "logps/chosen": -118.389892578125, "logps/rejected": -279.08148193359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.232492446899414, "rewards/margins": 12.619851112365723, "rewards/rejected": -14.85234260559082, "step": 4416 }, { "epoch": 1.51, "learning_rate": 3.015434007328798e-07, "logits/chosen": 0.04937727004289627, "logits/rejected": 0.07621389627456665, "logps/chosen": -226.60818481445312, "logps/rejected": -336.7806091308594, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.7057977914810181, "rewards/margins": 15.056217193603516, "rewards/rejected": -15.762014389038086, "step": 4417 }, { "epoch": 1.51, "learning_rate": 3.011479605744702e-07, "logits/chosen": 0.11435429751873016, "logits/rejected": 0.11159884929656982, "logps/chosen": -160.54896545410156, "logps/rejected": -329.060302734375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.7702357769012451, "rewards/margins": 13.127532005310059, "rewards/rejected": -13.897769927978516, "step": 4418 }, { "epoch": 1.51, "learning_rate": 3.0075273390571465e-07, "logits/chosen": 0.0026632605586200953, "logits/rejected": 0.012508528307080269, "logps/chosen": -222.6725616455078, "logps/rejected": -398.0294494628906, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.6086995601654053, "rewards/margins": 18.320438385009766, "rewards/rejected": -20.92913818359375, "step": 4419 }, { "epoch": 1.51, "learning_rate": 3.003577208473488e-07, "logits/chosen": 0.09890161454677582, "logits/rejected": 0.1217237114906311, "logps/chosen": -171.4306182861328, "logps/rejected": -340.7838134765625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.1555532217025757, "rewards/margins": 17.006031036376953, "rewards/rejected": -18.161582946777344, "step": 4420 }, { "epoch": 1.51, "learning_rate": 2.999629215200439e-07, "logits/chosen": -0.058943308889865875, "logits/rejected": -0.029612716287374496, "logps/chosen": -252.36837768554688, "logps/rejected": -357.872802734375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.3897216320037842, "rewards/margins": 15.40587043762207, "rewards/rejected": -16.795591354370117, "step": 4421 }, { "epoch": 1.51, "learning_rate": 2.995683360444061e-07, "logits/chosen": -0.0839359313249588, "logits/rejected": -0.045875709503889084, "logps/chosen": -204.9422607421875, "logps/rejected": -311.83270263671875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.4521197080612183, "rewards/margins": 12.528888702392578, "rewards/rejected": -13.981008529663086, "step": 4422 }, { "epoch": 1.51, "learning_rate": 2.991739645409749e-07, "logits/chosen": -0.0739387571811676, "logits/rejected": -0.04781606048345566, "logps/chosen": -277.541748046875, "logps/rejected": -375.77459716796875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.633896827697754, "rewards/margins": 15.221880912780762, "rewards/rejected": -16.85577964782715, "step": 4423 }, { "epoch": 1.51, "learning_rate": 2.987798071302264e-07, "logits/chosen": 0.017848161980509758, "logits/rejected": 0.06723958998918533, "logps/chosen": -251.25804138183594, "logps/rejected": -355.56634521484375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.9977980852127075, "rewards/margins": 13.889617919921875, "rewards/rejected": -15.88741397857666, "step": 4424 }, { "epoch": 1.51, "learning_rate": 2.983858639325698e-07, "logits/chosen": 0.008755686692893505, "logits/rejected": 0.05108032003045082, "logps/chosen": -184.06529235839844, "logps/rejected": -294.8797607421875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.6095895767211914, "rewards/margins": 14.915617942810059, "rewards/rejected": -16.52520751953125, "step": 4425 }, { "epoch": 1.51, "learning_rate": 2.9799213506834873e-07, "logits/chosen": 0.0004793024272657931, "logits/rejected": 0.029828637838363647, "logps/chosen": -237.73513793945312, "logps/rejected": -390.47369384765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9036670923233032, "rewards/margins": 15.660957336425781, "rewards/rejected": -16.564624786376953, "step": 4426 }, { "epoch": 1.51, "learning_rate": 2.9759862065784315e-07, "logits/chosen": 0.13620266318321228, "logits/rejected": 0.17094597220420837, "logps/chosen": -162.24974060058594, "logps/rejected": -337.8426208496094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.243691921234131, "rewards/margins": 17.363048553466797, "rewards/rejected": -19.606740951538086, "step": 4427 }, { "epoch": 1.51, "learning_rate": 2.9720532082126516e-07, "logits/chosen": 0.033421676605939865, "logits/rejected": 0.06761167198419571, "logps/chosen": -194.81723022460938, "logps/rejected": -359.1263732910156, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.184945821762085, "rewards/margins": 17.035839080810547, "rewards/rejected": -18.220788955688477, "step": 4428 }, { "epoch": 1.51, "learning_rate": 2.96812235678763e-07, "logits/chosen": 0.08773387968540192, "logits/rejected": 0.09540438652038574, "logps/chosen": -139.48345947265625, "logps/rejected": -248.02719116210938, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.1549475193023682, "rewards/margins": 11.180215835571289, "rewards/rejected": -12.335163116455078, "step": 4429 }, { "epoch": 1.51, "learning_rate": 2.964193653504189e-07, "logits/chosen": 0.21578121185302734, "logits/rejected": 0.23124416172504425, "logps/chosen": -129.24530029296875, "logps/rejected": -247.57882690429688, "loss": 0.0411, "rewards/accuracies": 0.9375, "rewards/chosen": -1.291776418685913, "rewards/margins": 11.43124771118164, "rewards/rejected": -12.723024368286133, "step": 4430 }, { "epoch": 1.51, "learning_rate": 2.960267099562487e-07, "logits/chosen": 0.0896163061261177, "logits/rejected": 0.09865211695432663, "logps/chosen": -96.79281616210938, "logps/rejected": -255.55657958984375, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.7019318342208862, "rewards/margins": 14.402973175048828, "rewards/rejected": -15.104904174804688, "step": 4431 }, { "epoch": 1.51, "learning_rate": 2.9563426961620364e-07, "logits/chosen": -0.04042383283376694, "logits/rejected": -0.017516905441880226, "logps/chosen": -231.26190185546875, "logps/rejected": -446.4617614746094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4909179210662842, "rewards/margins": 19.040058135986328, "rewards/rejected": -20.530975341796875, "step": 4432 }, { "epoch": 1.51, "learning_rate": 2.952420444501686e-07, "logits/chosen": 0.036858368664979935, "logits/rejected": 0.04635070636868477, "logps/chosen": -209.69625854492188, "logps/rejected": -413.783935546875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.8679897785186768, "rewards/margins": 18.73342514038086, "rewards/rejected": -20.60141372680664, "step": 4433 }, { "epoch": 1.51, "learning_rate": 2.9485003457796365e-07, "logits/chosen": 0.143127903342247, "logits/rejected": 0.16677933931350708, "logps/chosen": -166.71482849121094, "logps/rejected": -311.1341552734375, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -2.717500925064087, "rewards/margins": 14.79780387878418, "rewards/rejected": -17.515304565429688, "step": 4434 }, { "epoch": 1.51, "learning_rate": 2.944582401193414e-07, "logits/chosen": -0.05959908291697502, "logits/rejected": -0.02712157368659973, "logps/chosen": -281.6046447753906, "logps/rejected": -409.7314758300781, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.6565060615539551, "rewards/margins": 17.503572463989258, "rewards/rejected": -18.160078048706055, "step": 4435 }, { "epoch": 1.51, "learning_rate": 2.9406666119399013e-07, "logits/chosen": 0.0635310709476471, "logits/rejected": 0.08151053637266159, "logps/chosen": -169.11724853515625, "logps/rejected": -366.3287353515625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.53498911857605, "rewards/margins": 17.74028778076172, "rewards/rejected": -20.27527618408203, "step": 4436 }, { "epoch": 1.51, "learning_rate": 2.936752979215321e-07, "logits/chosen": -0.01886734738945961, "logits/rejected": 0.0054448749870061874, "logps/chosen": -244.23231506347656, "logps/rejected": -376.1762390136719, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -2.5835375785827637, "rewards/margins": 15.662379264831543, "rewards/rejected": -18.24591827392578, "step": 4437 }, { "epoch": 1.51, "learning_rate": 2.9328415042152246e-07, "logits/chosen": -0.09401209652423859, "logits/rejected": -0.04691458120942116, "logps/chosen": -233.44569396972656, "logps/rejected": -366.38372802734375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.7418484687805176, "rewards/margins": 18.960628509521484, "rewards/rejected": -19.702476501464844, "step": 4438 }, { "epoch": 1.52, "learning_rate": 2.9289321881345254e-07, "logits/chosen": 0.06317735463380814, "logits/rejected": 0.05693596974015236, "logps/chosen": -207.73875427246094, "logps/rejected": -404.81427001953125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3201310634613037, "rewards/margins": 17.234682083129883, "rewards/rejected": -18.5548152923584, "step": 4439 }, { "epoch": 1.52, "learning_rate": 2.9250250321674597e-07, "logits/chosen": 0.04835100844502449, "logits/rejected": 0.045245211571455, "logps/chosen": -203.49107360839844, "logps/rejected": -399.0494689941406, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.163529396057129, "rewards/margins": 18.014034271240234, "rewards/rejected": -20.17756462097168, "step": 4440 }, { "epoch": 1.52, "learning_rate": 2.921120037507604e-07, "logits/chosen": 0.03533770889043808, "logits/rejected": 0.03605028614401817, "logps/chosen": -218.7498779296875, "logps/rejected": -359.698486328125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.4504494667053223, "rewards/margins": 13.95750617980957, "rewards/rejected": -16.407955169677734, "step": 4441 }, { "epoch": 1.52, "learning_rate": 2.917217205347893e-07, "logits/chosen": -0.09449813514947891, "logits/rejected": -0.048739004880189896, "logps/chosen": -234.75799560546875, "logps/rejected": -383.33831787109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.285207748413086, "rewards/margins": 16.761276245117188, "rewards/rejected": -20.046485900878906, "step": 4442 }, { "epoch": 1.52, "learning_rate": 2.913316536880577e-07, "logits/chosen": 0.04533865675330162, "logits/rejected": 0.034010227769613266, "logps/chosen": -147.85482788085938, "logps/rejected": -386.4061279296875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -2.2851815223693848, "rewards/margins": 20.05844497680664, "rewards/rejected": -22.3436279296875, "step": 4443 }, { "epoch": 1.52, "learning_rate": 2.9094180332972617e-07, "logits/chosen": -0.07707381248474121, "logits/rejected": -0.04280254989862442, "logps/chosen": -288.5745849609375, "logps/rejected": -444.1606750488281, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.2443815469741821, "rewards/margins": 18.654010772705078, "rewards/rejected": -19.898391723632812, "step": 4444 }, { "epoch": 1.52, "learning_rate": 2.9055216957888873e-07, "logits/chosen": -0.05901385098695755, "logits/rejected": -0.03822119161486626, "logps/chosen": -160.18870544433594, "logps/rejected": -346.3428649902344, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4248104691505432, "rewards/margins": 16.872177124023438, "rewards/rejected": -17.296987533569336, "step": 4445 }, { "epoch": 1.52, "learning_rate": 2.9016275255457256e-07, "logits/chosen": 0.05507480725646019, "logits/rejected": 0.08859165757894516, "logps/chosen": -164.8011016845703, "logps/rejected": -339.1390380859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.382650375366211, "rewards/margins": 17.056365966796875, "rewards/rejected": -18.439016342163086, "step": 4446 }, { "epoch": 1.52, "learning_rate": 2.8977355237573963e-07, "logits/chosen": 0.10523141175508499, "logits/rejected": 0.1283089965581894, "logps/chosen": -157.9005126953125, "logps/rejected": -282.3906555175781, "loss": 0.0224, "rewards/accuracies": 0.9375, "rewards/chosen": -1.412775993347168, "rewards/margins": 14.068625450134277, "rewards/rejected": -15.481401443481445, "step": 4447 }, { "epoch": 1.52, "learning_rate": 2.8938456916128504e-07, "logits/chosen": 0.07383261620998383, "logits/rejected": 0.07770305871963501, "logps/chosen": -195.38467407226562, "logps/rejected": -406.7305908203125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.8476859331130981, "rewards/margins": 18.960912704467773, "rewards/rejected": -20.8085994720459, "step": 4448 }, { "epoch": 1.52, "learning_rate": 2.8899580303003825e-07, "logits/chosen": 0.04241826385259628, "logits/rejected": 0.07608509063720703, "logps/chosen": -206.51856994628906, "logps/rejected": -376.01947021484375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4919776916503906, "rewards/margins": 18.33988380432129, "rewards/rejected": -19.83186149597168, "step": 4449 }, { "epoch": 1.52, "learning_rate": 2.8860725410076117e-07, "logits/chosen": 0.0031024564523249865, "logits/rejected": 0.04916869103908539, "logps/chosen": -222.95997619628906, "logps/rejected": -440.71142578125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.2740280628204346, "rewards/margins": 18.889266967773438, "rewards/rejected": -21.163293838500977, "step": 4450 }, { "epoch": 1.52, "learning_rate": 2.882189224921505e-07, "logits/chosen": -0.020141208544373512, "logits/rejected": -0.015833469107747078, "logps/chosen": -236.32672119140625, "logps/rejected": -428.168212890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.413756847381592, "rewards/margins": 16.85057258605957, "rewards/rejected": -19.264328002929688, "step": 4451 }, { "epoch": 1.52, "learning_rate": 2.8783080832283657e-07, "logits/chosen": -0.08898389339447021, "logits/rejected": -0.06714686751365662, "logps/chosen": -222.10641479492188, "logps/rejected": -427.47198486328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.9213037490844727, "rewards/margins": 19.543846130371094, "rewards/rejected": -21.465150833129883, "step": 4452 }, { "epoch": 1.52, "learning_rate": 2.874429117113819e-07, "logits/chosen": 0.07382519543170929, "logits/rejected": 0.10243045538663864, "logps/chosen": -188.35751342773438, "logps/rejected": -301.0985412597656, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.995507538318634, "rewards/margins": 15.840841293334961, "rewards/rejected": -16.836349487304688, "step": 4453 }, { "epoch": 1.52, "learning_rate": 2.8705523277628483e-07, "logits/chosen": -0.031330954283475876, "logits/rejected": 0.023925840854644775, "logps/chosen": -175.11526489257812, "logps/rejected": -374.3099365234375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8759933114051819, "rewards/margins": 20.372278213500977, "rewards/rejected": -21.248271942138672, "step": 4454 }, { "epoch": 1.52, "learning_rate": 2.8666777163597534e-07, "logits/chosen": -0.031228717416524887, "logits/rejected": -0.0005533545627258718, "logps/chosen": -210.05067443847656, "logps/rejected": -385.7589111328125, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -2.2108991146087646, "rewards/margins": 18.491466522216797, "rewards/rejected": -20.70236587524414, "step": 4455 }, { "epoch": 1.52, "learning_rate": 2.862805284088168e-07, "logits/chosen": 0.04645277187228203, "logits/rejected": 0.05306532606482506, "logps/chosen": -200.83116149902344, "logps/rejected": -405.8982849121094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.8557426929473877, "rewards/margins": 18.558469772338867, "rewards/rejected": -21.414213180541992, "step": 4456 }, { "epoch": 1.52, "learning_rate": 2.858935032131078e-07, "logits/chosen": 0.08771877735853195, "logits/rejected": 0.11187692731618881, "logps/chosen": -184.3781280517578, "logps/rejected": -313.3587646484375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.3678102493286133, "rewards/margins": 14.223406791687012, "rewards/rejected": -16.591217041015625, "step": 4457 }, { "epoch": 1.52, "learning_rate": 2.8550669616707847e-07, "logits/chosen": 0.05873432755470276, "logits/rejected": 0.09558360278606415, "logps/chosen": -185.63095092773438, "logps/rejected": -297.2400817871094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2361249923706055, "rewards/margins": 16.41701889038086, "rewards/rejected": -17.65314483642578, "step": 4458 }, { "epoch": 1.52, "learning_rate": 2.8512010738889323e-07, "logits/chosen": -0.04613880068063736, "logits/rejected": -0.01883751153945923, "logps/chosen": -215.17657470703125, "logps/rejected": -347.8094787597656, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.7345653772354126, "rewards/margins": 17.153343200683594, "rewards/rejected": -18.887908935546875, "step": 4459 }, { "epoch": 1.52, "learning_rate": 2.847337369966499e-07, "logits/chosen": 0.006671112030744553, "logits/rejected": 0.039476264268159866, "logps/chosen": -164.61866760253906, "logps/rejected": -321.075439453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.221181869506836, "rewards/margins": 15.989972114562988, "rewards/rejected": -18.21115493774414, "step": 4460 }, { "epoch": 1.52, "learning_rate": 2.8434758510837877e-07, "logits/chosen": 0.00651088822633028, "logits/rejected": 0.02889995276927948, "logps/chosen": -171.1531524658203, "logps/rejected": -277.0455322265625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8135018944740295, "rewards/margins": 10.596155166625977, "rewards/rejected": -11.409656524658203, "step": 4461 }, { "epoch": 1.52, "learning_rate": 2.8396165184204415e-07, "logits/chosen": -0.024472109973430634, "logits/rejected": 0.01124905701726675, "logps/chosen": -242.92892456054688, "logps/rejected": -344.9659423828125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6223101615905762, "rewards/margins": 16.139467239379883, "rewards/rejected": -17.761777877807617, "step": 4462 }, { "epoch": 1.52, "learning_rate": 2.835759373155435e-07, "logits/chosen": 0.08075892925262451, "logits/rejected": 0.08490718901157379, "logps/chosen": -109.31065368652344, "logps/rejected": -341.82830810546875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.5349348783493042, "rewards/margins": 18.33214569091797, "rewards/rejected": -19.86707878112793, "step": 4463 }, { "epoch": 1.52, "learning_rate": 2.83190441646707e-07, "logits/chosen": -0.06351850181818008, "logits/rejected": -0.039559945464134216, "logps/chosen": -219.69354248046875, "logps/rejected": -445.9971008300781, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.5978385210037231, "rewards/margins": 20.573888778686523, "rewards/rejected": -22.171728134155273, "step": 4464 }, { "epoch": 1.52, "learning_rate": 2.8280516495329843e-07, "logits/chosen": 0.06548183411359787, "logits/rejected": 0.07099302113056183, "logps/chosen": -179.4274139404297, "logps/rejected": -344.26165771484375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.597701072692871, "rewards/margins": 14.823867797851562, "rewards/rejected": -17.42156982421875, "step": 4465 }, { "epoch": 1.52, "learning_rate": 2.8242010735301435e-07, "logits/chosen": -0.06009636074304581, "logits/rejected": -0.029756156727671623, "logps/chosen": -218.01611328125, "logps/rejected": -417.6410217285156, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.3245723247528076, "rewards/margins": 18.415050506591797, "rewards/rejected": -21.7396240234375, "step": 4466 }, { "epoch": 1.52, "learning_rate": 2.8203526896348493e-07, "logits/chosen": 0.047798674553632736, "logits/rejected": 0.09285738319158554, "logps/chosen": -198.9239959716797, "logps/rejected": -359.50653076171875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.052632927894592285, "rewards/margins": 20.203250885009766, "rewards/rejected": -20.150617599487305, "step": 4467 }, { "epoch": 1.52, "learning_rate": 2.816506499022725e-07, "logits/chosen": -0.03541864454746246, "logits/rejected": -0.028124742209911346, "logps/chosen": -218.83615112304688, "logps/rejected": -402.82769775390625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.7533666491508484, "rewards/margins": 17.455360412597656, "rewards/rejected": -18.208728790283203, "step": 4468 }, { "epoch": 1.53, "learning_rate": 2.812662502868731e-07, "logits/chosen": 0.08306866139173508, "logits/rejected": 0.09371405839920044, "logps/chosen": -193.11367797851562, "logps/rejected": -335.9728698730469, "loss": 0.0162, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8244866132736206, "rewards/margins": 14.59593391418457, "rewards/rejected": -16.420421600341797, "step": 4469 }, { "epoch": 1.53, "learning_rate": 2.808820702347158e-07, "logits/chosen": 0.156871035695076, "logits/rejected": 0.17434771358966827, "logps/chosen": -169.35446166992188, "logps/rejected": -354.8345031738281, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609599113464355, "rewards/margins": 14.527076721191406, "rewards/rejected": -15.988038063049316, "step": 4470 }, { "epoch": 1.53, "learning_rate": 2.804981098631612e-07, "logits/chosen": 0.006380854174494743, "logits/rejected": 0.04292047768831253, "logps/chosen": -238.08563232421875, "logps/rejected": -341.8504638671875, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -0.6135713458061218, "rewards/margins": 13.96832275390625, "rewards/rejected": -14.581893920898438, "step": 4471 }, { "epoch": 1.53, "learning_rate": 2.801143692895055e-07, "logits/chosen": -0.11611383408308029, "logits/rejected": -0.05984778329730034, "logps/chosen": -277.9203186035156, "logps/rejected": -414.03851318359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2573306560516357, "rewards/margins": 19.571083068847656, "rewards/rejected": -20.828414916992188, "step": 4472 }, { "epoch": 1.53, "learning_rate": 2.797308486309753e-07, "logits/chosen": -0.06158897280693054, "logits/rejected": -0.034709297120571136, "logps/chosen": -258.76995849609375, "logps/rejected": -420.2793884277344, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.050747863948345184, "rewards/margins": 19.916292190551758, "rewards/rejected": -19.967041015625, "step": 4473 }, { "epoch": 1.53, "learning_rate": 2.793475480047303e-07, "logits/chosen": -0.051216211169958115, "logits/rejected": -0.0216104444116354, "logps/chosen": -174.4559326171875, "logps/rejected": -337.4826965332031, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -2.2694334983825684, "rewards/margins": 14.499423027038574, "rewards/rejected": -16.768856048583984, "step": 4474 }, { "epoch": 1.53, "learning_rate": 2.7896446752786475e-07, "logits/chosen": 0.06068332493305206, "logits/rejected": 0.08706415444612503, "logps/chosen": -179.18846130371094, "logps/rejected": -330.8844909667969, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.225816011428833, "rewards/margins": 14.494388580322266, "rewards/rejected": -17.720205307006836, "step": 4475 }, { "epoch": 1.53, "learning_rate": 2.785816073174035e-07, "logits/chosen": -0.14203685522079468, "logits/rejected": -0.13518747687339783, "logps/chosen": -231.8448486328125, "logps/rejected": -397.40362548828125, "loss": 0.0452, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7903172969818115, "rewards/margins": 16.031709671020508, "rewards/rejected": -17.8220272064209, "step": 4476 }, { "epoch": 1.53, "learning_rate": 2.781989674903056e-07, "logits/chosen": -0.06707335263490677, "logits/rejected": -0.030493367463350296, "logps/chosen": -212.97030639648438, "logps/rejected": -377.391845703125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.1831777095794678, "rewards/margins": 17.321651458740234, "rewards/rejected": -19.50482940673828, "step": 4477 }, { "epoch": 1.53, "learning_rate": 2.778165481634622e-07, "logits/chosen": 0.016526279971003532, "logits/rejected": 0.05538255348801613, "logps/chosen": -214.98074340820312, "logps/rejected": -406.0625305175781, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.6183977127075195, "rewards/margins": 18.43025016784668, "rewards/rejected": -20.048648834228516, "step": 4478 }, { "epoch": 1.53, "learning_rate": 2.774343494536968e-07, "logits/chosen": 0.0938183069229126, "logits/rejected": 0.09537967294454575, "logps/chosen": -189.75726318359375, "logps/rejected": -397.903076171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.1890132427215576, "rewards/margins": 17.790451049804688, "rewards/rejected": -19.97946548461914, "step": 4479 }, { "epoch": 1.53, "learning_rate": 2.7705237147776595e-07, "logits/chosen": 0.045727845281362534, "logits/rejected": 0.07381787896156311, "logps/chosen": -191.15293884277344, "logps/rejected": -291.0589599609375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.8414742946624756, "rewards/margins": 13.290081977844238, "rewards/rejected": -14.131558418273926, "step": 4480 }, { "epoch": 1.53, "learning_rate": 2.766706143523587e-07, "logits/chosen": 0.0631766989827156, "logits/rejected": 0.09060593694448471, "logps/chosen": -129.28065490722656, "logps/rejected": -285.848388671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.111265182495117, "rewards/margins": 14.644214630126953, "rewards/rejected": -16.75547981262207, "step": 4481 }, { "epoch": 1.53, "learning_rate": 2.7628907819409695e-07, "logits/chosen": -0.07449971139431, "logits/rejected": -0.058369703590869904, "logps/chosen": -244.65838623046875, "logps/rejected": -438.23541259765625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.4399378299713135, "rewards/margins": 18.909008026123047, "rewards/rejected": -20.348941802978516, "step": 4482 }, { "epoch": 1.53, "learning_rate": 2.759077631195341e-07, "logits/chosen": 0.028365476056933403, "logits/rejected": 0.0518973171710968, "logps/chosen": -162.83697509765625, "logps/rejected": -246.05081176757812, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.6509099006652832, "rewards/margins": 11.509988784790039, "rewards/rejected": -13.160900115966797, "step": 4483 }, { "epoch": 1.53, "learning_rate": 2.7552666924515687e-07, "logits/chosen": 0.05798012763261795, "logits/rejected": 0.06881137937307358, "logps/chosen": -183.74937438964844, "logps/rejected": -430.7537536621094, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.7648817300796509, "rewards/margins": 19.31507110595703, "rewards/rejected": -21.079954147338867, "step": 4484 }, { "epoch": 1.53, "learning_rate": 2.7514579668738457e-07, "logits/chosen": 0.020740879699587822, "logits/rejected": 0.026373129338026047, "logps/chosen": -200.32919311523438, "logps/rejected": -345.583740234375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.2529847621917725, "rewards/margins": 15.516569137573242, "rewards/rejected": -18.769554138183594, "step": 4485 }, { "epoch": 1.53, "learning_rate": 2.7476514556256756e-07, "logits/chosen": 0.054664935916662216, "logits/rejected": 0.09868834167718887, "logps/chosen": -211.51486206054688, "logps/rejected": -330.3980712890625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.696223497390747, "rewards/margins": 15.942386627197266, "rewards/rejected": -17.63861083984375, "step": 4486 }, { "epoch": 1.53, "learning_rate": 2.7438471598699064e-07, "logits/chosen": 0.07979997992515564, "logits/rejected": 0.10774687677621841, "logps/chosen": -225.4812774658203, "logps/rejected": -420.5314025878906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.502626419067383, "rewards/margins": 18.727563858032227, "rewards/rejected": -21.230192184448242, "step": 4487 }, { "epoch": 1.53, "learning_rate": 2.740045080768694e-07, "logits/chosen": 0.09604020416736603, "logits/rejected": 0.10774925351142883, "logps/chosen": -156.27908325195312, "logps/rejected": -278.49267578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.6006433963775635, "rewards/margins": 11.4165678024292, "rewards/rejected": -14.0172119140625, "step": 4488 }, { "epoch": 1.53, "learning_rate": 2.736245219483514e-07, "logits/chosen": 0.003971542697399855, "logits/rejected": 0.0409458763897419, "logps/chosen": -171.8875732421875, "logps/rejected": -288.6654052734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.9970481395721436, "rewards/margins": 13.52405834197998, "rewards/rejected": -15.521105766296387, "step": 4489 }, { "epoch": 1.53, "learning_rate": 2.732447577175182e-07, "logits/chosen": 0.08195636421442032, "logits/rejected": 0.09459642320871353, "logps/chosen": -249.67919921875, "logps/rejected": -430.71966552734375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6219336986541748, "rewards/margins": 17.767160415649414, "rewards/rejected": -19.38909339904785, "step": 4490 }, { "epoch": 1.53, "learning_rate": 2.728652155003819e-07, "logits/chosen": 0.015577170997858047, "logits/rejected": 0.036582186818122864, "logps/chosen": -179.69422912597656, "logps/rejected": -310.8036193847656, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6756526231765747, "rewards/margins": 13.08595085144043, "rewards/rejected": -13.761604309082031, "step": 4491 }, { "epoch": 1.53, "learning_rate": 2.724858954128876e-07, "logits/chosen": -0.0062613049522042274, "logits/rejected": 0.006337924860417843, "logps/chosen": -216.50125122070312, "logps/rejected": -403.4850158691406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.44367319345474243, "rewards/margins": 17.31382942199707, "rewards/rejected": -17.757503509521484, "step": 4492 }, { "epoch": 1.53, "learning_rate": 2.721067975709125e-07, "logits/chosen": 0.10441362857818604, "logits/rejected": 0.1280292421579361, "logps/chosen": -180.94134521484375, "logps/rejected": -300.2599792480469, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.3649234771728516, "rewards/margins": 12.388864517211914, "rewards/rejected": -14.753787994384766, "step": 4493 }, { "epoch": 1.53, "learning_rate": 2.717279220902654e-07, "logits/chosen": 0.07210653275251389, "logits/rejected": 0.10094688832759857, "logps/chosen": -179.09323120117188, "logps/rejected": -305.734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.539356529712677, "rewards/margins": 15.384306907653809, "rewards/rejected": -15.923664093017578, "step": 4494 }, { "epoch": 1.53, "learning_rate": 2.7134926908668773e-07, "logits/chosen": 0.06229829788208008, "logits/rejected": 0.06448683142662048, "logps/chosen": -148.00741577148438, "logps/rejected": -357.49395751953125, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.2636525630950928, "rewards/margins": 17.35814666748047, "rewards/rejected": -18.62179946899414, "step": 4495 }, { "epoch": 1.53, "learning_rate": 2.709708386758527e-07, "logits/chosen": 0.01748618483543396, "logits/rejected": 0.034798312932252884, "logps/chosen": -219.94479370117188, "logps/rejected": -438.4610900878906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8675470948219299, "rewards/margins": 20.022207260131836, "rewards/rejected": -20.889751434326172, "step": 4496 }, { "epoch": 1.53, "learning_rate": 2.7059263097336595e-07, "logits/chosen": 0.06252507865428925, "logits/rejected": 0.0919593945145607, "logps/chosen": -157.85781860351562, "logps/rejected": -272.594970703125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.7175121307373047, "rewards/margins": 13.02358341217041, "rewards/rejected": -15.741095542907715, "step": 4497 }, { "epoch": 1.54, "learning_rate": 2.702146460947642e-07, "logits/chosen": 0.009883961640298367, "logits/rejected": 0.07292856276035309, "logps/chosen": -181.97544860839844, "logps/rejected": -269.2147521972656, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -0.9106104969978333, "rewards/margins": 15.391977310180664, "rewards/rejected": -16.302587509155273, "step": 4498 }, { "epoch": 1.54, "learning_rate": 2.6983688415551664e-07, "logits/chosen": 0.11831142753362656, "logits/rejected": 0.12998104095458984, "logps/chosen": -199.7061767578125, "logps/rejected": -346.4012145996094, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.753358006477356, "rewards/margins": 15.072686195373535, "rewards/rejected": -15.826044082641602, "step": 4499 }, { "epoch": 1.54, "learning_rate": 2.6945934527102485e-07, "logits/chosen": -0.03758179396390915, "logits/rejected": 0.004964509513229132, "logps/chosen": -242.8389129638672, "logps/rejected": -454.5576477050781, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.2549614906311035, "rewards/margins": 22.535358428955078, "rewards/rejected": -25.790321350097656, "step": 4500 }, { "epoch": 1.54, "learning_rate": 2.6908202955662116e-07, "logits/chosen": 0.07914654910564423, "logits/rejected": 0.10168928653001785, "logps/chosen": -233.82374572753906, "logps/rejected": -338.864501953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.204193353652954, "rewards/margins": 12.076812744140625, "rewards/rejected": -13.281006813049316, "step": 4501 }, { "epoch": 1.54, "learning_rate": 2.687049371275705e-07, "logits/chosen": 0.08088475465774536, "logits/rejected": 0.1136421263217926, "logps/chosen": -224.29580688476562, "logps/rejected": -384.6058044433594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.8680931329727173, "rewards/margins": 16.247236251831055, "rewards/rejected": -18.11532974243164, "step": 4502 }, { "epoch": 1.54, "learning_rate": 2.683280680990698e-07, "logits/chosen": -0.03743530437350273, "logits/rejected": -0.010237202979624271, "logps/chosen": -219.22021484375, "logps/rejected": -386.39398193359375, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": -1.979002833366394, "rewards/margins": 17.9324951171875, "rewards/rejected": -19.911497116088867, "step": 4503 }, { "epoch": 1.54, "learning_rate": 2.6795142258624636e-07, "logits/chosen": -0.003836500458419323, "logits/rejected": 0.013389799743890762, "logps/chosen": -178.89370727539062, "logps/rejected": -330.5672607421875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.4404754638671875, "rewards/margins": 15.363967895507812, "rewards/rejected": -17.804443359375, "step": 4504 }, { "epoch": 1.54, "learning_rate": 2.675750007041615e-07, "logits/chosen": 0.062275372445583344, "logits/rejected": 0.07401805371046066, "logps/chosen": -240.9688262939453, "logps/rejected": -414.4643249511719, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.95815110206604, "rewards/margins": 17.865222930908203, "rewards/rejected": -19.82337188720703, "step": 4505 }, { "epoch": 1.54, "learning_rate": 2.6719880256780616e-07, "logits/chosen": -0.08465074747800827, "logits/rejected": -0.011268626898527145, "logps/chosen": -286.8562927246094, "logps/rejected": -423.0533447265625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.18186354637146, "rewards/margins": 19.8358154296875, "rewards/rejected": -22.017681121826172, "step": 4506 }, { "epoch": 1.54, "learning_rate": 2.668228282921031e-07, "logits/chosen": 0.02954402193427086, "logits/rejected": 0.04063914343714714, "logps/chosen": -174.53726196289062, "logps/rejected": -342.86859130859375, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -2.1792707443237305, "rewards/margins": 14.5270357131958, "rewards/rejected": -16.70630645751953, "step": 4507 }, { "epoch": 1.54, "learning_rate": 2.6644707799190866e-07, "logits/chosen": 0.01992669701576233, "logits/rejected": 0.06456510722637177, "logps/chosen": -266.5074462890625, "logps/rejected": -443.228759765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7261717319488525, "rewards/margins": 19.155738830566406, "rewards/rejected": -21.881912231445312, "step": 4508 }, { "epoch": 1.54, "learning_rate": 2.6607155178200836e-07, "logits/chosen": 0.05665545538067818, "logits/rejected": 0.07945916056632996, "logps/chosen": -207.64468383789062, "logps/rejected": -416.83642578125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.6906620264053345, "rewards/margins": 19.858068466186523, "rewards/rejected": -21.54873275756836, "step": 4509 }, { "epoch": 1.54, "learning_rate": 2.6569624977712044e-07, "logits/chosen": -0.0396120510995388, "logits/rejected": -0.002965620718896389, "logps/chosen": -189.27890014648438, "logps/rejected": -271.8656921386719, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.5146945714950562, "rewards/margins": 12.585347175598145, "rewards/rejected": -14.100042343139648, "step": 4510 }, { "epoch": 1.54, "learning_rate": 2.65321172091895e-07, "logits/chosen": 0.0011231881799176335, "logits/rejected": 0.02062990516424179, "logps/chosen": -212.4644775390625, "logps/rejected": -334.75872802734375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5794612169265747, "rewards/margins": 14.752185821533203, "rewards/rejected": -15.331646919250488, "step": 4511 }, { "epoch": 1.54, "learning_rate": 2.649463188409123e-07, "logits/chosen": 0.023934314027428627, "logits/rejected": 0.03726696968078613, "logps/chosen": -256.7148742675781, "logps/rejected": -379.10406494140625, "loss": 0.035, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0999953746795654, "rewards/margins": 15.926800727844238, "rewards/rejected": -17.02679443359375, "step": 4512 }, { "epoch": 1.54, "learning_rate": 2.645716901386853e-07, "logits/chosen": -0.0044967192225158215, "logits/rejected": 0.02033897116780281, "logps/chosen": -212.38287353515625, "logps/rejected": -429.0550842285156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.4406960010528564, "rewards/margins": 20.27942657470703, "rewards/rejected": -23.720121383666992, "step": 4513 }, { "epoch": 1.54, "learning_rate": 2.6419728609965777e-07, "logits/chosen": 0.08607139438390732, "logits/rejected": 0.10410355031490326, "logps/chosen": -185.0890655517578, "logps/rejected": -326.6340637207031, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -1.8063414096832275, "rewards/margins": 15.636781692504883, "rewards/rejected": -17.44312286376953, "step": 4514 }, { "epoch": 1.54, "learning_rate": 2.638231068382054e-07, "logits/chosen": -0.05868569016456604, "logits/rejected": -0.0046996125020086765, "logps/chosen": -236.80838012695312, "logps/rejected": -318.12945556640625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.6268472075462341, "rewards/margins": 14.064082145690918, "rewards/rejected": -14.69092845916748, "step": 4515 }, { "epoch": 1.54, "learning_rate": 2.634491524686341e-07, "logits/chosen": 0.041607946157455444, "logits/rejected": 0.07518641650676727, "logps/chosen": -207.6865234375, "logps/rejected": -309.1414794921875, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -2.9731926918029785, "rewards/margins": 10.222726821899414, "rewards/rejected": -13.195919036865234, "step": 4516 }, { "epoch": 1.54, "learning_rate": 2.6307542310518203e-07, "logits/chosen": -0.04544886574149132, "logits/rejected": -0.038682304322719574, "logps/chosen": -211.78887939453125, "logps/rejected": -415.3434143066406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1129961013793945, "rewards/margins": 17.886707305908203, "rewards/rejected": -19.99970245361328, "step": 4517 }, { "epoch": 1.54, "learning_rate": 2.627019188620189e-07, "logits/chosen": -0.0013609346933662891, "logits/rejected": 0.02771083451807499, "logps/chosen": -193.50799560546875, "logps/rejected": -392.4329833984375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -3.323837995529175, "rewards/margins": 18.20741844177246, "rewards/rejected": -21.531259536743164, "step": 4518 }, { "epoch": 1.54, "learning_rate": 2.6232863985324385e-07, "logits/chosen": -0.021793486550450325, "logits/rejected": 0.020809438079595566, "logps/chosen": -186.65399169921875, "logps/rejected": -343.73187255859375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.1138734817504883, "rewards/margins": 16.85207748413086, "rewards/rejected": -17.96595001220703, "step": 4519 }, { "epoch": 1.54, "learning_rate": 2.619555861928899e-07, "logits/chosen": 0.058973439037799835, "logits/rejected": 0.0604742132127285, "logps/chosen": -163.44358825683594, "logps/rejected": -292.65472412109375, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.3259767293930054, "rewards/margins": 12.839677810668945, "rewards/rejected": -14.165655136108398, "step": 4520 }, { "epoch": 1.54, "learning_rate": 2.615827579949191e-07, "logits/chosen": 0.03249070420861244, "logits/rejected": 0.06608676165342331, "logps/chosen": -174.67022705078125, "logps/rejected": -286.3646545410156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.5560686588287354, "rewards/margins": 13.973345756530762, "rewards/rejected": -14.529414176940918, "step": 4521 }, { "epoch": 1.54, "learning_rate": 2.6121015537322477e-07, "logits/chosen": -0.06695228070020676, "logits/rejected": -0.027882765978574753, "logps/chosen": -219.13681030273438, "logps/rejected": -400.625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.7677586078643799, "rewards/margins": 18.9687557220459, "rewards/rejected": -19.736513137817383, "step": 4522 }, { "epoch": 1.54, "learning_rate": 2.608377784416331e-07, "logits/chosen": 0.13615237176418304, "logits/rejected": 0.1618366241455078, "logps/chosen": -221.54164123535156, "logps/rejected": -311.48651123046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2675306797027588, "rewards/margins": 13.478278160095215, "rewards/rejected": -14.745809555053711, "step": 4523 }, { "epoch": 1.54, "learning_rate": 2.604656273138991e-07, "logits/chosen": -0.03750234469771385, "logits/rejected": -0.022183898836374283, "logps/chosen": -190.27786254882812, "logps/rejected": -364.05780029296875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.4564290046691895, "rewards/margins": 14.117835998535156, "rewards/rejected": -15.574262619018555, "step": 4524 }, { "epoch": 1.54, "learning_rate": 2.6009370210371016e-07, "logits/chosen": -0.040233906358480453, "logits/rejected": -0.019328074529767036, "logps/chosen": -230.23976135253906, "logps/rejected": -437.1804504394531, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6835161447525024, "rewards/margins": 19.106874465942383, "rewards/rejected": -20.790390014648438, "step": 4525 }, { "epoch": 1.54, "learning_rate": 2.597220029246846e-07, "logits/chosen": -0.06295599788427353, "logits/rejected": -0.04392631724476814, "logps/chosen": -220.44215393066406, "logps/rejected": -399.8193664550781, "loss": 0.0207, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4657442569732666, "rewards/margins": 16.864042282104492, "rewards/rejected": -19.329788208007812, "step": 4526 }, { "epoch": 1.55, "learning_rate": 2.593505298903709e-07, "logits/chosen": 0.11025813221931458, "logits/rejected": 0.12321700900793076, "logps/chosen": -147.883056640625, "logps/rejected": -266.5400695800781, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.4356071949005127, "rewards/margins": 12.71380615234375, "rewards/rejected": -14.149413108825684, "step": 4527 }, { "epoch": 1.55, "learning_rate": 2.5897928311424897e-07, "logits/chosen": 0.03978344425559044, "logits/rejected": 0.06935302913188934, "logps/chosen": -181.32733154296875, "logps/rejected": -340.06329345703125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.4845399856567383, "rewards/margins": 16.07389259338379, "rewards/rejected": -18.558433532714844, "step": 4528 }, { "epoch": 1.55, "learning_rate": 2.586082627097298e-07, "logits/chosen": 0.017954472452402115, "logits/rejected": 0.029375851154327393, "logps/chosen": -197.14556884765625, "logps/rejected": -329.31884765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7764708995819092, "rewards/margins": 13.13125991821289, "rewards/rejected": -14.907731056213379, "step": 4529 }, { "epoch": 1.55, "learning_rate": 2.582374687901553e-07, "logits/chosen": -0.05131924897432327, "logits/rejected": -0.041529618203639984, "logps/chosen": -224.1698760986328, "logps/rejected": -405.03363037109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7275755405426025, "rewards/margins": 18.134294509887695, "rewards/rejected": -19.86186981201172, "step": 4530 }, { "epoch": 1.55, "learning_rate": 2.578669014687971e-07, "logits/chosen": 0.01857011765241623, "logits/rejected": 0.05180065706372261, "logps/chosen": -247.60409545898438, "logps/rejected": -444.1153259277344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.376530408859253, "rewards/margins": 19.611726760864258, "rewards/rejected": -20.98825454711914, "step": 4531 }, { "epoch": 1.55, "learning_rate": 2.5749656085885894e-07, "logits/chosen": 0.010117635130882263, "logits/rejected": 0.07465571165084839, "logps/chosen": -258.2041320800781, "logps/rejected": -328.788330078125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6509454250335693, "rewards/margins": 14.919082641601562, "rewards/rejected": -16.57002830505371, "step": 4532 }, { "epoch": 1.55, "learning_rate": 2.57126447073475e-07, "logits/chosen": 0.02777883969247341, "logits/rejected": 0.02472529374063015, "logps/chosen": -195.43731689453125, "logps/rejected": -371.3943786621094, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.2665669918060303, "rewards/margins": 15.712709426879883, "rewards/rejected": -16.97927474975586, "step": 4533 }, { "epoch": 1.55, "learning_rate": 2.567565602257089e-07, "logits/chosen": -0.0008295244188047945, "logits/rejected": 0.040352679789066315, "logps/chosen": -205.26121520996094, "logps/rejected": -320.57525634765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.5809248685836792, "rewards/margins": 15.058167457580566, "rewards/rejected": -16.63909149169922, "step": 4534 }, { "epoch": 1.55, "learning_rate": 2.563869004285573e-07, "logits/chosen": 0.029792873188853264, "logits/rejected": 0.07528147101402283, "logps/chosen": -157.661376953125, "logps/rejected": -263.0003356933594, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.3807061910629272, "rewards/margins": 13.434333801269531, "rewards/rejected": -14.81503963470459, "step": 4535 }, { "epoch": 1.55, "learning_rate": 2.560174677949456e-07, "logits/chosen": 0.028754401952028275, "logits/rejected": 0.04320351034402847, "logps/chosen": -214.04139709472656, "logps/rejected": -344.2649841308594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.5217347145080566, "rewards/margins": 15.33049201965332, "rewards/rejected": -17.85222816467285, "step": 4536 }, { "epoch": 1.55, "learning_rate": 2.556482624377296e-07, "logits/chosen": 0.00035661706351675093, "logits/rejected": 0.005181582644581795, "logps/chosen": -251.28268432617188, "logps/rejected": -424.7007751464844, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.7022422552108765, "rewards/margins": 17.34708595275879, "rewards/rejected": -19.04932975769043, "step": 4537 }, { "epoch": 1.55, "learning_rate": 2.552792844696978e-07, "logits/chosen": 0.1530146598815918, "logits/rejected": 0.18079182505607605, "logps/chosen": -187.5327606201172, "logps/rejected": -382.7769775390625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.4840022325515747, "rewards/margins": 20.20575714111328, "rewards/rejected": -21.689760208129883, "step": 4538 }, { "epoch": 1.55, "learning_rate": 2.549105340035669e-07, "logits/chosen": -0.030030934140086174, "logits/rejected": -0.0049097430892288685, "logps/chosen": -248.63711547851562, "logps/rejected": -390.5668640136719, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.2505149841308594, "rewards/margins": 15.361742973327637, "rewards/rejected": -17.612258911132812, "step": 4539 }, { "epoch": 1.55, "learning_rate": 2.545420111519855e-07, "logits/chosen": 0.04979557543992996, "logits/rejected": 0.0705595538020134, "logps/chosen": -145.8153839111328, "logps/rejected": -237.92481994628906, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.2650772333145142, "rewards/margins": 10.609256744384766, "rewards/rejected": -11.874334335327148, "step": 4540 }, { "epoch": 1.55, "learning_rate": 2.5417371602753237e-07, "logits/chosen": 0.03811470419168472, "logits/rejected": 0.05954865366220474, "logps/chosen": -229.94024658203125, "logps/rejected": -330.0382385253906, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -1.5438578128814697, "rewards/margins": 14.520966529846191, "rewards/rejected": -16.064823150634766, "step": 4541 }, { "epoch": 1.55, "learning_rate": 2.5380564874271603e-07, "logits/chosen": -0.0030805389396846294, "logits/rejected": -0.0044763702899217606, "logps/chosen": -170.3645477294922, "logps/rejected": -356.2979736328125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.9749629497528076, "rewards/margins": 15.156401634216309, "rewards/rejected": -17.131364822387695, "step": 4542 }, { "epoch": 1.55, "learning_rate": 2.534378094099764e-07, "logits/chosen": -0.044771958142519, "logits/rejected": -0.01690913923084736, "logps/chosen": -172.10751342773438, "logps/rejected": -318.7524108886719, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -2.5087509155273438, "rewards/margins": 14.278436660766602, "rewards/rejected": -16.787185668945312, "step": 4543 }, { "epoch": 1.55, "learning_rate": 2.530701981416834e-07, "logits/chosen": -0.10245724022388458, "logits/rejected": -0.09448515623807907, "logps/chosen": -203.3739013671875, "logps/rejected": -368.36175537109375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.695889949798584, "rewards/margins": 15.185564041137695, "rewards/rejected": -16.881454467773438, "step": 4544 }, { "epoch": 1.55, "learning_rate": 2.527028150501367e-07, "logits/chosen": 0.06578925251960754, "logits/rejected": 0.10335388034582138, "logps/chosen": -227.88475036621094, "logps/rejected": -345.1014404296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7383402585983276, "rewards/margins": 14.709446907043457, "rewards/rejected": -16.447786331176758, "step": 4545 }, { "epoch": 1.55, "learning_rate": 2.5233566024756716e-07, "logits/chosen": 0.02201772667467594, "logits/rejected": 0.03061475045979023, "logps/chosen": -173.35336303710938, "logps/rejected": -354.6355285644531, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -1.9514352083206177, "rewards/margins": 17.183563232421875, "rewards/rejected": -19.135000228881836, "step": 4546 }, { "epoch": 1.55, "learning_rate": 2.5196873384613537e-07, "logits/chosen": 0.05945834890007973, "logits/rejected": 0.08262615650892258, "logps/chosen": -161.313232421875, "logps/rejected": -284.5234069824219, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4594484567642212, "rewards/margins": 14.20052719116211, "rewards/rejected": -15.659975051879883, "step": 4547 }, { "epoch": 1.55, "learning_rate": 2.516020359579327e-07, "logits/chosen": 0.07782266288995743, "logits/rejected": 0.10379941761493683, "logps/chosen": -176.21791076660156, "logps/rejected": -287.98785400390625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.3845500946044922, "rewards/margins": 14.752029418945312, "rewards/rejected": -16.136579513549805, "step": 4548 }, { "epoch": 1.55, "learning_rate": 2.512355666949796e-07, "logits/chosen": 0.08002779632806778, "logits/rejected": 0.11793645471334457, "logps/chosen": -231.13555908203125, "logps/rejected": -339.2303466796875, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.6228844523429871, "rewards/margins": 15.785909652709961, "rewards/rejected": -16.40879249572754, "step": 4549 }, { "epoch": 1.55, "learning_rate": 2.508693261692276e-07, "logits/chosen": 0.09247229248285294, "logits/rejected": 0.10252124816179276, "logps/chosen": -218.14620971679688, "logps/rejected": -362.7156982421875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3027777671813965, "rewards/margins": 15.23232650756836, "rewards/rejected": -16.53510284423828, "step": 4550 }, { "epoch": 1.55, "learning_rate": 2.505033144925587e-07, "logits/chosen": -0.014886382035911083, "logits/rejected": -0.010761048644781113, "logps/chosen": -152.39791870117188, "logps/rejected": -307.71551513671875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.950246810913086, "rewards/margins": 12.811483383178711, "rewards/rejected": -14.76172924041748, "step": 4551 }, { "epoch": 1.55, "learning_rate": 2.501375317767832e-07, "logits/chosen": 0.15734118223190308, "logits/rejected": 0.19222387671470642, "logps/chosen": -204.1429443359375, "logps/rejected": -328.29638671875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.6427265405654907, "rewards/margins": 17.96723747253418, "rewards/rejected": -17.32451057434082, "step": 4552 }, { "epoch": 1.55, "learning_rate": 2.4977197813364403e-07, "logits/chosen": 0.08074339479207993, "logits/rejected": 0.12815538048744202, "logps/chosen": -200.21005249023438, "logps/rejected": -343.5960998535156, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.5468175411224365, "rewards/margins": 16.17683219909668, "rewards/rejected": -17.723648071289062, "step": 4553 }, { "epoch": 1.55, "learning_rate": 2.49406653674812e-07, "logits/chosen": 0.08813800662755966, "logits/rejected": 0.13851432502269745, "logps/chosen": -225.9037322998047, "logps/rejected": -333.61700439453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6093218326568604, "rewards/margins": 15.570250511169434, "rewards/rejected": -16.17957305908203, "step": 4554 }, { "epoch": 1.55, "learning_rate": 2.490415585118887e-07, "logits/chosen": 0.10033415257930756, "logits/rejected": 0.10584706813097, "logps/chosen": -166.22509765625, "logps/rejected": -344.15350341796875, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -2.458404064178467, "rewards/margins": 17.218076705932617, "rewards/rejected": -19.67647933959961, "step": 4555 }, { "epoch": 1.55, "learning_rate": 2.4867669275640613e-07, "logits/chosen": 0.0805000364780426, "logits/rejected": 0.0901227816939354, "logps/chosen": -172.19342041015625, "logps/rejected": -387.22698974609375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2777620553970337, "rewards/margins": 17.717100143432617, "rewards/rejected": -18.994863510131836, "step": 4556 }, { "epoch": 1.56, "learning_rate": 2.4831205651982526e-07, "logits/chosen": 0.0038271271623671055, "logits/rejected": 0.03815794736146927, "logps/chosen": -171.70156860351562, "logps/rejected": -279.11468505859375, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -1.1957173347473145, "rewards/margins": 14.102567672729492, "rewards/rejected": -15.298286437988281, "step": 4557 }, { "epoch": 1.56, "learning_rate": 2.4794764991353746e-07, "logits/chosen": -0.10152479261159897, "logits/rejected": -0.05449395254254341, "logps/chosen": -219.0220489501953, "logps/rejected": -402.7695007324219, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.8073325157165527, "rewards/margins": 17.357465744018555, "rewards/rejected": -19.164798736572266, "step": 4558 }, { "epoch": 1.56, "learning_rate": 2.475834730488644e-07, "logits/chosen": -0.04095398634672165, "logits/rejected": 0.0017499198438599706, "logps/chosen": -222.5440673828125, "logps/rejected": -384.99267578125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.8323745727539062, "rewards/margins": 17.59889793395996, "rewards/rejected": -19.431272506713867, "step": 4559 }, { "epoch": 1.56, "learning_rate": 2.472195260370565e-07, "logits/chosen": -0.058325376361608505, "logits/rejected": -0.002553702564910054, "logps/chosen": -235.06312561035156, "logps/rejected": -350.60137939453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.5809712409973145, "rewards/margins": 16.650175094604492, "rewards/rejected": -19.231143951416016, "step": 4560 }, { "epoch": 1.56, "learning_rate": 2.468558089892948e-07, "logits/chosen": -0.08235529810190201, "logits/rejected": -0.0559379942715168, "logps/chosen": -218.92349243164062, "logps/rejected": -371.7561340332031, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0115796327590942, "rewards/margins": 14.808984756469727, "rewards/rejected": -15.820564270019531, "step": 4561 }, { "epoch": 1.56, "learning_rate": 2.464923220166898e-07, "logits/chosen": 0.10307173430919647, "logits/rejected": 0.11723022162914276, "logps/chosen": -159.1278076171875, "logps/rejected": -323.30169677734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.202366352081299, "rewards/margins": 15.798839569091797, "rewards/rejected": -18.00120735168457, "step": 4562 }, { "epoch": 1.56, "learning_rate": 2.461290652302821e-07, "logits/chosen": 0.12694330513477325, "logits/rejected": 0.1462189257144928, "logps/chosen": -130.84591674804688, "logps/rejected": -225.89666748046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.02921484410762787, "rewards/margins": 11.84869384765625, "rewards/rejected": -11.819479942321777, "step": 4563 }, { "epoch": 1.56, "learning_rate": 2.4576603874104105e-07, "logits/chosen": -0.03230539709329605, "logits/rejected": 0.023223791271448135, "logps/chosen": -240.29794311523438, "logps/rejected": -297.9509582519531, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.8996247053146362, "rewards/margins": 12.463119506835938, "rewards/rejected": -13.362743377685547, "step": 4564 }, { "epoch": 1.56, "learning_rate": 2.4540324265986643e-07, "logits/chosen": -0.058380212634801865, "logits/rejected": -0.026919059455394745, "logps/chosen": -198.68898010253906, "logps/rejected": -331.2745666503906, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -2.323418378829956, "rewards/margins": 17.00885009765625, "rewards/rejected": -19.33226776123047, "step": 4565 }, { "epoch": 1.56, "learning_rate": 2.4504067709758793e-07, "logits/chosen": -0.023182811215519905, "logits/rejected": -0.03606322035193443, "logps/chosen": -217.18222045898438, "logps/rejected": -382.6773986816406, "loss": 0.1122, "rewards/accuracies": 0.9375, "rewards/chosen": -3.330883741378784, "rewards/margins": 12.828372955322266, "rewards/rejected": -16.159255981445312, "step": 4566 }, { "epoch": 1.56, "learning_rate": 2.4467834216496317e-07, "logits/chosen": 0.07216626405715942, "logits/rejected": 0.08714108914136887, "logps/chosen": -201.71176147460938, "logps/rejected": -382.3604736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4399638175964355, "rewards/margins": 18.050189971923828, "rewards/rejected": -19.490154266357422, "step": 4567 }, { "epoch": 1.56, "learning_rate": 2.44316237972682e-07, "logits/chosen": -0.08844423294067383, "logits/rejected": -0.0476248562335968, "logps/chosen": -195.6519012451172, "logps/rejected": -308.5309143066406, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.6811361312866211, "rewards/margins": 17.508216857910156, "rewards/rejected": -16.82708168029785, "step": 4568 }, { "epoch": 1.56, "learning_rate": 2.43954364631361e-07, "logits/chosen": 0.04188321903347969, "logits/rejected": 0.08449024707078934, "logps/chosen": -148.55775451660156, "logps/rejected": -290.8720397949219, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.8055199384689331, "rewards/margins": 16.302528381347656, "rewards/rejected": -17.108049392700195, "step": 4569 }, { "epoch": 1.56, "learning_rate": 2.4359272225154803e-07, "logits/chosen": -0.004222371149808168, "logits/rejected": 0.0057071503251791, "logps/chosen": -179.2572021484375, "logps/rejected": -352.4527893066406, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": 0.2728727459907532, "rewards/margins": 17.186599731445312, "rewards/rejected": -16.913728713989258, "step": 4570 }, { "epoch": 1.56, "learning_rate": 2.4323131094371993e-07, "logits/chosen": 0.04625072702765465, "logits/rejected": 0.0915829986333847, "logps/chosen": -166.0173797607422, "logps/rejected": -278.4921569824219, "loss": 0.0513, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8781129121780396, "rewards/margins": 14.29941177368164, "rewards/rejected": -16.17752456665039, "step": 4571 }, { "epoch": 1.56, "learning_rate": 2.428701308182826e-07, "logits/chosen": 0.036548469215631485, "logits/rejected": 0.10144007951021194, "logps/chosen": -196.01988220214844, "logps/rejected": -384.58856201171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0362507104873657, "rewards/margins": 19.542110443115234, "rewards/rejected": -20.57836151123047, "step": 4572 }, { "epoch": 1.56, "learning_rate": 2.4250918198557155e-07, "logits/chosen": 0.03067556396126747, "logits/rejected": 0.04610918089747429, "logps/chosen": -142.73719787597656, "logps/rejected": -295.9989013671875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.948253870010376, "rewards/margins": 16.259017944335938, "rewards/rejected": -17.207271575927734, "step": 4573 }, { "epoch": 1.56, "learning_rate": 2.4214846455585216e-07, "logits/chosen": 0.012692832387983799, "logits/rejected": 0.025942126289010048, "logps/chosen": -154.40589904785156, "logps/rejected": -295.41717529296875, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.638035774230957, "rewards/margins": 14.703360557556152, "rewards/rejected": -17.341394424438477, "step": 4574 }, { "epoch": 1.56, "learning_rate": 2.41787978639318e-07, "logits/chosen": -0.07618625462055206, "logits/rejected": -0.03297652676701546, "logps/chosen": -185.36260986328125, "logps/rejected": -297.97381591796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.2182148694992065, "rewards/margins": 14.16407585144043, "rewards/rejected": -15.382290840148926, "step": 4575 }, { "epoch": 1.56, "learning_rate": 2.414277243460927e-07, "logits/chosen": -0.06382002681493759, "logits/rejected": -0.036109957844018936, "logps/chosen": -180.2671661376953, "logps/rejected": -283.6045837402344, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.190389394760132, "rewards/margins": 12.680496215820312, "rewards/rejected": -14.870885848999023, "step": 4576 }, { "epoch": 1.56, "learning_rate": 2.410677017862295e-07, "logits/chosen": -0.09953675419092178, "logits/rejected": -0.07600224018096924, "logps/chosen": -196.70779418945312, "logps/rejected": -382.03399658203125, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -1.9417613744735718, "rewards/margins": 17.1905517578125, "rewards/rejected": -19.132314682006836, "step": 4577 }, { "epoch": 1.56, "learning_rate": 2.407079110697096e-07, "logits/chosen": 0.05013568326830864, "logits/rejected": 0.08755654096603394, "logps/chosen": -161.42919921875, "logps/rejected": -277.97174072265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1767089366912842, "rewards/margins": 15.99282455444336, "rewards/rejected": -17.16953468322754, "step": 4578 }, { "epoch": 1.56, "learning_rate": 2.4034835230644435e-07, "logits/chosen": -0.05341469496488571, "logits/rejected": -0.04080936685204506, "logps/chosen": -207.87921142578125, "logps/rejected": -374.4815673828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.79521644115448, "rewards/margins": 17.66398811340332, "rewards/rejected": -19.459203720092773, "step": 4579 }, { "epoch": 1.56, "learning_rate": 2.39989025606274e-07, "logits/chosen": 0.023271683603525162, "logits/rejected": 0.029889432713389397, "logps/chosen": -195.01101684570312, "logps/rejected": -350.9491882324219, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.028037667274475, "rewards/margins": 16.4815673828125, "rewards/rejected": -17.509605407714844, "step": 4580 }, { "epoch": 1.56, "learning_rate": 2.3962993107896835e-07, "logits/chosen": 0.058269042521715164, "logits/rejected": 0.08808524906635284, "logps/chosen": -184.95188903808594, "logps/rejected": -390.03546142578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9755938053131104, "rewards/margins": 16.846355438232422, "rewards/rejected": -17.821949005126953, "step": 4581 }, { "epoch": 1.56, "learning_rate": 2.392710688342251e-07, "logits/chosen": -0.01619654707610607, "logits/rejected": 0.013590140268206596, "logps/chosen": -258.04925537109375, "logps/rejected": -418.0043640136719, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.5134968757629395, "rewards/margins": 17.61109733581543, "rewards/rejected": -20.124595642089844, "step": 4582 }, { "epoch": 1.56, "learning_rate": 2.3891243898167203e-07, "logits/chosen": 0.09346789121627808, "logits/rejected": 0.12212209403514862, "logps/chosen": -186.2580108642578, "logps/rejected": -308.830810546875, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -2.461591958999634, "rewards/margins": 13.862342834472656, "rewards/rejected": -16.323936462402344, "step": 4583 }, { "epoch": 1.56, "learning_rate": 2.3855404163086556e-07, "logits/chosen": -0.017022620886564255, "logits/rejected": 0.020813550800085068, "logps/chosen": -250.00001525878906, "logps/rejected": -317.9132995605469, "loss": 0.0471, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8825092315673828, "rewards/margins": 12.440361022949219, "rewards/rejected": -13.322869300842285, "step": 4584 }, { "epoch": 1.56, "learning_rate": 2.3819587689129116e-07, "logits/chosen": -0.04626268520951271, "logits/rejected": -0.03059462271630764, "logps/chosen": -180.49200439453125, "logps/rejected": -383.6897277832031, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.0757560729980469, "rewards/margins": 20.24917984008789, "rewards/rejected": -21.32493782043457, "step": 4585 }, { "epoch": 1.57, "learning_rate": 2.3783794487236365e-07, "logits/chosen": -0.010418613441288471, "logits/rejected": 0.002083483152091503, "logps/chosen": -185.62025451660156, "logps/rejected": -365.9720764160156, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.9970594644546509, "rewards/margins": 17.35629653930664, "rewards/rejected": -18.353357315063477, "step": 4586 }, { "epoch": 1.57, "learning_rate": 2.374802456834255e-07, "logits/chosen": 0.013222692534327507, "logits/rejected": 0.04362604022026062, "logps/chosen": -239.18032836914062, "logps/rejected": -422.26629638671875, "loss": 0.0308, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9932653903961182, "rewards/margins": 19.356779098510742, "rewards/rejected": -20.35004425048828, "step": 4587 }, { "epoch": 1.57, "learning_rate": 2.3712277943374947e-07, "logits/chosen": 0.02503557689487934, "logits/rejected": 0.045292653143405914, "logps/chosen": -207.2286376953125, "logps/rejected": -395.0453796386719, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0685683488845825, "rewards/margins": 19.65959358215332, "rewards/rejected": -20.728160858154297, "step": 4588 }, { "epoch": 1.57, "learning_rate": 2.3676554623253652e-07, "logits/chosen": 0.03607875853776932, "logits/rejected": 0.05383746698498726, "logps/chosen": -268.1138000488281, "logps/rejected": -432.1739501953125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7238335609436035, "rewards/margins": 18.522541046142578, "rewards/rejected": -19.246374130249023, "step": 4589 }, { "epoch": 1.57, "learning_rate": 2.3640854618891614e-07, "logits/chosen": 0.005235570017248392, "logits/rejected": 0.04263221472501755, "logps/chosen": -197.55577087402344, "logps/rejected": -320.18548583984375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.477580726146698, "rewards/margins": 15.638725280761719, "rewards/rejected": -16.11630630493164, "step": 4590 }, { "epoch": 1.57, "learning_rate": 2.3605177941194698e-07, "logits/chosen": -0.01986202970147133, "logits/rejected": 0.010185085237026215, "logps/chosen": -218.8426055908203, "logps/rejected": -344.29718017578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2937010526657104, "rewards/margins": 15.921088218688965, "rewards/rejected": -17.21478843688965, "step": 4591 }, { "epoch": 1.57, "learning_rate": 2.3569524601061686e-07, "logits/chosen": 0.15420730412006378, "logits/rejected": 0.14539872109889984, "logps/chosen": -121.17649841308594, "logps/rejected": -271.4691162109375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.8300575017929077, "rewards/margins": 11.886841773986816, "rewards/rejected": -13.716898918151855, "step": 4592 }, { "epoch": 1.57, "learning_rate": 2.353389460938412e-07, "logits/chosen": 0.030422791838645935, "logits/rejected": 0.07851022481918335, "logps/chosen": -247.68960571289062, "logps/rejected": -344.67352294921875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.23712822794914246, "rewards/margins": 17.077943801879883, "rewards/rejected": -17.31507110595703, "step": 4593 }, { "epoch": 1.57, "learning_rate": 2.3498287977046495e-07, "logits/chosen": -0.02463223598897457, "logits/rejected": -0.01965886540710926, "logps/chosen": -254.65309143066406, "logps/rejected": -438.1893005371094, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -2.9028711318969727, "rewards/margins": 17.756114959716797, "rewards/rejected": -20.658985137939453, "step": 4594 }, { "epoch": 1.57, "learning_rate": 2.346270471492614e-07, "logits/chosen": 0.18228475749492645, "logits/rejected": 0.21760158240795135, "logps/chosen": -144.48818969726562, "logps/rejected": -317.87347412109375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.5499000549316406, "rewards/margins": 17.458049774169922, "rewards/rejected": -20.007949829101562, "step": 4595 }, { "epoch": 1.57, "learning_rate": 2.3427144833893285e-07, "logits/chosen": 0.031804025173187256, "logits/rejected": 0.09604053199291229, "logps/chosen": -212.53024291992188, "logps/rejected": -318.67315673828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8879895210266113, "rewards/margins": 16.134504318237305, "rewards/rejected": -17.022493362426758, "step": 4596 }, { "epoch": 1.57, "learning_rate": 2.339160834481093e-07, "logits/chosen": 0.06745412945747375, "logits/rejected": 0.10936618596315384, "logps/chosen": -230.52178955078125, "logps/rejected": -382.431884765625, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.946029782295227, "rewards/margins": 15.990165710449219, "rewards/rejected": -17.93619728088379, "step": 4597 }, { "epoch": 1.57, "learning_rate": 2.3356095258535013e-07, "logits/chosen": -0.044827111065387726, "logits/rejected": -0.015925128012895584, "logps/chosen": -206.5760498046875, "logps/rejected": -382.3590393066406, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.0624752044677734, "rewards/margins": 17.343597412109375, "rewards/rejected": -18.40607261657715, "step": 4598 }, { "epoch": 1.57, "learning_rate": 2.3320605585914276e-07, "logits/chosen": -0.022244751453399658, "logits/rejected": -0.001961665228009224, "logps/chosen": -219.38909912109375, "logps/rejected": -405.3647155761719, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7855486869812012, "rewards/margins": 18.815080642700195, "rewards/rejected": -20.600627899169922, "step": 4599 }, { "epoch": 1.57, "learning_rate": 2.3285139337790337e-07, "logits/chosen": -0.10819689929485321, "logits/rejected": -0.07561475038528442, "logps/chosen": -248.75592041015625, "logps/rejected": -401.7640380859375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.09323233366012573, "rewards/margins": 17.528017044067383, "rewards/rejected": -17.621248245239258, "step": 4600 }, { "epoch": 1.57, "learning_rate": 2.3249696524997663e-07, "logits/chosen": -0.05237728729844093, "logits/rejected": -0.03575154393911362, "logps/chosen": -263.16754150390625, "logps/rejected": -448.38525390625, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.46029341220855713, "rewards/margins": 20.143653869628906, "rewards/rejected": -20.60394859313965, "step": 4601 }, { "epoch": 1.57, "learning_rate": 2.3214277158363504e-07, "logits/chosen": -0.037377096712589264, "logits/rejected": -0.026474561542272568, "logps/chosen": -237.51071166992188, "logps/rejected": -363.027587890625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2378085851669312, "rewards/margins": 12.693405151367188, "rewards/rejected": -13.93121337890625, "step": 4602 }, { "epoch": 1.57, "learning_rate": 2.3178881248708004e-07, "logits/chosen": 0.045654360204935074, "logits/rejected": 0.07987724989652634, "logps/chosen": -148.58607482910156, "logps/rejected": -311.0589599609375, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -2.0434908866882324, "rewards/margins": 14.88437557220459, "rewards/rejected": -16.927865982055664, "step": 4603 }, { "epoch": 1.57, "learning_rate": 2.314350880684416e-07, "logits/chosen": 0.07740794122219086, "logits/rejected": 0.10233646631240845, "logps/chosen": -178.74996948242188, "logps/rejected": -335.65179443359375, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -1.304765224456787, "rewards/margins": 16.937326431274414, "rewards/rejected": -18.242090225219727, "step": 4604 }, { "epoch": 1.57, "learning_rate": 2.31081598435777e-07, "logits/chosen": -0.008976159617304802, "logits/rejected": 0.02369319647550583, "logps/chosen": -197.55209350585938, "logps/rejected": -368.5479431152344, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.7015063762664795, "rewards/margins": 17.467119216918945, "rewards/rejected": -19.16862678527832, "step": 4605 }, { "epoch": 1.57, "learning_rate": 2.3072834369707283e-07, "logits/chosen": 0.005771304946392775, "logits/rejected": 0.04286589473485947, "logps/chosen": -239.04373168945312, "logps/rejected": -302.6165466308594, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.4860605001449585, "rewards/margins": 13.321931838989258, "rewards/rejected": -14.80799388885498, "step": 4606 }, { "epoch": 1.57, "learning_rate": 2.3037532396024372e-07, "logits/chosen": -0.0326036773622036, "logits/rejected": -0.010770563036203384, "logps/chosen": -208.75814819335938, "logps/rejected": -372.532958984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.38472163677215576, "rewards/margins": 16.59267234802246, "rewards/rejected": -16.97739601135254, "step": 4607 }, { "epoch": 1.57, "learning_rate": 2.3002253933313175e-07, "logits/chosen": -0.014355775900185108, "logits/rejected": 0.01898999512195587, "logps/chosen": -187.88809204101562, "logps/rejected": -265.110595703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.4353479743003845, "rewards/margins": 12.773038864135742, "rewards/rejected": -13.20838737487793, "step": 4608 }, { "epoch": 1.57, "learning_rate": 2.2966998992350817e-07, "logits/chosen": 0.03519344702363014, "logits/rejected": 0.07024051994085312, "logps/chosen": -170.83729553222656, "logps/rejected": -285.4344177246094, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.1034891605377197, "rewards/margins": 12.86874771118164, "rewards/rejected": -13.972235679626465, "step": 4609 }, { "epoch": 1.57, "learning_rate": 2.2931767583907192e-07, "logits/chosen": 0.04105411842465401, "logits/rejected": 0.044585660099983215, "logps/chosen": -221.20584106445312, "logps/rejected": -410.7991638183594, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.061819314956665, "rewards/margins": 16.69927978515625, "rewards/rejected": -19.761098861694336, "step": 4610 }, { "epoch": 1.57, "learning_rate": 2.2896559718745023e-07, "logits/chosen": 0.0005873086047358811, "logits/rejected": 0.03774493187665939, "logps/chosen": -152.83755493164062, "logps/rejected": -285.2567138671875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7836576700210571, "rewards/margins": 13.633743286132812, "rewards/rejected": -14.417402267456055, "step": 4611 }, { "epoch": 1.57, "learning_rate": 2.2861375407619789e-07, "logits/chosen": -0.04020055755972862, "logits/rejected": -0.0018088450888171792, "logps/chosen": -163.87339782714844, "logps/rejected": -342.2605895996094, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.601097047328949, "rewards/margins": 17.202383041381836, "rewards/rejected": -17.80348014831543, "step": 4612 }, { "epoch": 1.57, "learning_rate": 2.282621466127982e-07, "logits/chosen": 0.01306227222084999, "logits/rejected": 0.05021930858492851, "logps/chosen": -202.10011291503906, "logps/rejected": -372.34735107421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1085723638534546, "rewards/margins": 18.14310073852539, "rewards/rejected": -19.251670837402344, "step": 4613 }, { "epoch": 1.57, "learning_rate": 2.279107749046626e-07, "logits/chosen": -0.021391069516539574, "logits/rejected": 0.014475364238023758, "logps/chosen": -209.99365234375, "logps/rejected": -341.9996337890625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9731758236885071, "rewards/margins": 15.686383247375488, "rewards/rejected": -16.65955924987793, "step": 4614 }, { "epoch": 1.58, "learning_rate": 2.2755963905913044e-07, "logits/chosen": 0.01635012961924076, "logits/rejected": 0.04026801139116287, "logps/chosen": -182.72030639648438, "logps/rejected": -279.5132141113281, "loss": 0.0865, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9696764349937439, "rewards/margins": 11.994436264038086, "rewards/rejected": -12.964112281799316, "step": 4615 }, { "epoch": 1.58, "learning_rate": 2.2720873918346838e-07, "logits/chosen": 0.04035858437418938, "logits/rejected": 0.07360414415597916, "logps/chosen": -242.68289184570312, "logps/rejected": -363.47808837890625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.034797191619873, "rewards/margins": 15.517000198364258, "rewards/rejected": -17.551795959472656, "step": 4616 }, { "epoch": 1.58, "learning_rate": 2.2685807538487177e-07, "logits/chosen": 0.06850820779800415, "logits/rejected": 0.09543361514806747, "logps/chosen": -207.465576171875, "logps/rejected": -363.9898681640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8768247961997986, "rewards/margins": 15.721379280090332, "rewards/rejected": -16.59820556640625, "step": 4617 }, { "epoch": 1.58, "learning_rate": 2.265076477704635e-07, "logits/chosen": 0.044964227825403214, "logits/rejected": 0.061244163662195206, "logps/chosen": -254.26239013671875, "logps/rejected": -424.4677734375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.0349056720733643, "rewards/margins": 16.167495727539062, "rewards/rejected": -18.202404022216797, "step": 4618 }, { "epoch": 1.58, "learning_rate": 2.2615745644729477e-07, "logits/chosen": 0.026441847905516624, "logits/rejected": 0.03731301799416542, "logps/chosen": -253.22280883789062, "logps/rejected": -413.80694580078125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.15567746758460999, "rewards/margins": 18.900455474853516, "rewards/rejected": -19.056129455566406, "step": 4619 }, { "epoch": 1.58, "learning_rate": 2.258075015223435e-07, "logits/chosen": 0.003675688523799181, "logits/rejected": 0.035080332309007645, "logps/chosen": -141.43069458007812, "logps/rejected": -325.90142822265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.9450488090515137, "rewards/margins": 17.336042404174805, "rewards/rejected": -19.281091690063477, "step": 4620 }, { "epoch": 1.58, "learning_rate": 2.2545778310251638e-07, "logits/chosen": 0.09933813661336899, "logits/rejected": 0.10636338591575623, "logps/chosen": -146.38217163085938, "logps/rejected": -276.9953308105469, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.012335337698459625, "rewards/margins": 13.843476295471191, "rewards/rejected": -13.831141471862793, "step": 4621 }, { "epoch": 1.58, "learning_rate": 2.2510830129464797e-07, "logits/chosen": 0.08397560566663742, "logits/rejected": 0.14097504317760468, "logps/chosen": -235.23699951171875, "logps/rejected": -371.9771423339844, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.8715254068374634, "rewards/margins": 20.27773094177246, "rewards/rejected": -21.149253845214844, "step": 4622 }, { "epoch": 1.58, "learning_rate": 2.247590562054994e-07, "logits/chosen": -0.08035528659820557, "logits/rejected": -0.07029491662979126, "logps/chosen": -163.3506317138672, "logps/rejected": -353.954345703125, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.4389365911483765, "rewards/margins": 17.287580490112305, "rewards/rejected": -18.726516723632812, "step": 4623 }, { "epoch": 1.58, "learning_rate": 2.2441004794176066e-07, "logits/chosen": -0.00724615016952157, "logits/rejected": 0.018301162868738174, "logps/chosen": -230.75985717773438, "logps/rejected": -387.18414306640625, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 0.10889589041471481, "rewards/margins": 17.78520393371582, "rewards/rejected": -17.676307678222656, "step": 4624 }, { "epoch": 1.58, "learning_rate": 2.2406127661004915e-07, "logits/chosen": 0.11099153757095337, "logits/rejected": 0.1351958066225052, "logps/chosen": -194.8966827392578, "logps/rejected": -346.69769287109375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.2869459390640259, "rewards/margins": 15.952228546142578, "rewards/rejected": -17.23917579650879, "step": 4625 }, { "epoch": 1.58, "learning_rate": 2.2371274231690917e-07, "logits/chosen": 0.013448825106024742, "logits/rejected": 0.037300221621990204, "logps/chosen": -222.46278381347656, "logps/rejected": -369.8197021484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7259857058525085, "rewards/margins": 17.468456268310547, "rewards/rejected": -18.194442749023438, "step": 4626 }, { "epoch": 1.58, "learning_rate": 2.2336444516881347e-07, "logits/chosen": 0.010147767141461372, "logits/rejected": 0.0290912464261055, "logps/chosen": -192.5130157470703, "logps/rejected": -360.2606506347656, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.098287343978882, "rewards/margins": 15.434893608093262, "rewards/rejected": -17.533180236816406, "step": 4627 }, { "epoch": 1.58, "learning_rate": 2.2301638527216194e-07, "logits/chosen": 0.08870543539524078, "logits/rejected": 0.08931154012680054, "logps/chosen": -176.23898315429688, "logps/rejected": -338.9704284667969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7526286840438843, "rewards/margins": 14.816779136657715, "rewards/rejected": -16.569408416748047, "step": 4628 }, { "epoch": 1.58, "learning_rate": 2.2266856273328205e-07, "logits/chosen": 0.056799158453941345, "logits/rejected": 0.0988127738237381, "logps/chosen": -198.62301635742188, "logps/rejected": -328.1506042480469, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9232735633850098, "rewards/margins": 18.03183364868164, "rewards/rejected": -18.95510482788086, "step": 4629 }, { "epoch": 1.58, "learning_rate": 2.2232097765842917e-07, "logits/chosen": -0.028083784505724907, "logits/rejected": 0.02401747554540634, "logps/chosen": -196.79165649414062, "logps/rejected": -319.8525390625, "loss": 0.0278, "rewards/accuracies": 0.9375, "rewards/chosen": -2.766991138458252, "rewards/margins": 14.827259063720703, "rewards/rejected": -17.594249725341797, "step": 4630 }, { "epoch": 1.58, "learning_rate": 2.219736301537851e-07, "logits/chosen": 0.047747861593961716, "logits/rejected": 0.06412193924188614, "logps/chosen": -187.21707153320312, "logps/rejected": -359.74273681640625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.18442471325397491, "rewards/margins": 18.405261993408203, "rewards/rejected": -18.58968734741211, "step": 4631 }, { "epoch": 1.58, "learning_rate": 2.2162652032546003e-07, "logits/chosen": -0.06954994797706604, "logits/rejected": -0.04839671403169632, "logps/chosen": -162.08453369140625, "logps/rejected": -323.54644775390625, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.3441591262817383, "rewards/margins": 14.450847625732422, "rewards/rejected": -15.79500675201416, "step": 4632 }, { "epoch": 1.58, "learning_rate": 2.212796482794912e-07, "logits/chosen": -0.0019870074465870857, "logits/rejected": 0.0474885068833828, "logps/chosen": -285.2896728515625, "logps/rejected": -367.4805908203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.280055284500122, "rewards/margins": 15.738443374633789, "rewards/rejected": -17.01849937438965, "step": 4633 }, { "epoch": 1.58, "learning_rate": 2.209330141218435e-07, "logits/chosen": 0.03135015815496445, "logits/rejected": 0.055921200662851334, "logps/chosen": -185.22186279296875, "logps/rejected": -308.99591064453125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.9857856035232544, "rewards/margins": 13.863357543945312, "rewards/rejected": -15.849142074584961, "step": 4634 }, { "epoch": 1.58, "learning_rate": 2.2058661795840839e-07, "logits/chosen": 0.004023507237434387, "logits/rejected": 0.049071233719587326, "logps/chosen": -168.4828338623047, "logps/rejected": -242.5347900390625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.8877413272857666, "rewards/margins": 10.850820541381836, "rewards/rejected": -13.73856258392334, "step": 4635 }, { "epoch": 1.58, "learning_rate": 2.202404598950054e-07, "logits/chosen": -0.19244231283664703, "logits/rejected": -0.17978014051914215, "logps/chosen": -279.484375, "logps/rejected": -448.7222900390625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.22095942497253418, "rewards/margins": 18.748489379882812, "rewards/rejected": -18.527528762817383, "step": 4636 }, { "epoch": 1.58, "learning_rate": 2.1989454003738118e-07, "logits/chosen": -0.01920842006802559, "logits/rejected": -0.006453047040849924, "logps/chosen": -153.21002197265625, "logps/rejected": -336.62542724609375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.9574079513549805, "rewards/margins": 14.838277816772461, "rewards/rejected": -16.795684814453125, "step": 4637 }, { "epoch": 1.58, "learning_rate": 2.195488584912091e-07, "logits/chosen": -0.019607383757829666, "logits/rejected": 0.0018957958091050386, "logps/chosen": -223.86903381347656, "logps/rejected": -389.5501403808594, "loss": 0.0568, "rewards/accuracies": 0.9375, "rewards/chosen": -2.114644765853882, "rewards/margins": 16.457321166992188, "rewards/rejected": -18.571964263916016, "step": 4638 }, { "epoch": 1.58, "learning_rate": 2.192034153620902e-07, "logits/chosen": -0.04733514413237572, "logits/rejected": -0.036444537341594696, "logps/chosen": -176.38819885253906, "logps/rejected": -322.7315673828125, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -1.80002760887146, "rewards/margins": 13.435484886169434, "rewards/rejected": -15.235512733459473, "step": 4639 }, { "epoch": 1.58, "learning_rate": 2.18858210755553e-07, "logits/chosen": -0.04292210564017296, "logits/rejected": -0.0031583914533257484, "logps/chosen": -256.7514343261719, "logps/rejected": -430.9798278808594, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.4805798530578613, "rewards/margins": 16.227924346923828, "rewards/rejected": -18.70850372314453, "step": 4640 }, { "epoch": 1.58, "learning_rate": 2.1851324477705212e-07, "logits/chosen": 0.07870224863290787, "logits/rejected": 0.11370259523391724, "logps/chosen": -153.87261962890625, "logps/rejected": -292.29833984375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5475587844848633, "rewards/margins": 14.865761756896973, "rewards/rejected": -16.413320541381836, "step": 4641 }, { "epoch": 1.58, "learning_rate": 2.181685175319702e-07, "logits/chosen": 0.027572384104132652, "logits/rejected": 0.04822711646556854, "logps/chosen": -162.2503662109375, "logps/rejected": -352.0592346191406, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.7387551069259644, "rewards/margins": 19.14332389831543, "rewards/rejected": -20.882080078125, "step": 4642 }, { "epoch": 1.58, "learning_rate": 2.178240291256167e-07, "logits/chosen": 0.07237106561660767, "logits/rejected": 0.11752333492040634, "logps/chosen": -198.85504150390625, "logps/rejected": -326.00079345703125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.5671733617782593, "rewards/margins": 14.759929656982422, "rewards/rejected": -16.327102661132812, "step": 4643 }, { "epoch": 1.58, "learning_rate": 2.1747977966322805e-07, "logits/chosen": 0.006311872508376837, "logits/rejected": 0.034998416900634766, "logps/chosen": -249.7557373046875, "logps/rejected": -383.6361083984375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.5708156824111938, "rewards/margins": 15.651350021362305, "rewards/rejected": -17.222164154052734, "step": 4644 }, { "epoch": 1.59, "learning_rate": 2.1713576924996803e-07, "logits/chosen": -0.10397360473871231, "logits/rejected": -0.0495411679148674, "logps/chosen": -208.42344665527344, "logps/rejected": -398.1058654785156, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.6268144845962524, "rewards/margins": 19.557964324951172, "rewards/rejected": -20.184776306152344, "step": 4645 }, { "epoch": 1.59, "learning_rate": 2.1679199799092628e-07, "logits/chosen": -0.0014037262881174684, "logits/rejected": 0.026840079575777054, "logps/chosen": -210.30706787109375, "logps/rejected": -379.8363037109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.508588194847107, "rewards/margins": 17.760055541992188, "rewards/rejected": -19.26864242553711, "step": 4646 }, { "epoch": 1.59, "learning_rate": 2.1644846599112077e-07, "logits/chosen": 0.07165494561195374, "logits/rejected": 0.10251191258430481, "logps/chosen": -117.5347900390625, "logps/rejected": -274.50091552734375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.113518238067627, "rewards/margins": 14.462446212768555, "rewards/rejected": -16.575963973999023, "step": 4647 }, { "epoch": 1.59, "learning_rate": 2.161051733554956e-07, "logits/chosen": 0.069806307554245, "logits/rejected": 0.11544454097747803, "logps/chosen": -186.99110412597656, "logps/rejected": -259.7610778808594, "loss": 0.0233, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5347081422805786, "rewards/margins": 13.210755348205566, "rewards/rejected": -13.745465278625488, "step": 4648 }, { "epoch": 1.59, "learning_rate": 2.1576212018892237e-07, "logits/chosen": -0.09172455221414566, "logits/rejected": -0.03775068745017052, "logps/chosen": -263.5050964355469, "logps/rejected": -337.5511474609375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.770984411239624, "rewards/margins": 16.570276260375977, "rewards/rejected": -15.799290657043457, "step": 4649 }, { "epoch": 1.59, "learning_rate": 2.1541930659619833e-07, "logits/chosen": -0.03544728830456734, "logits/rejected": 0.054618287831544876, "logps/chosen": -228.8880157470703, "logps/rejected": -302.9875183105469, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.5655001997947693, "rewards/margins": 15.861501693725586, "rewards/rejected": -16.427000045776367, "step": 4650 }, { "epoch": 1.59, "learning_rate": 2.1507673268204863e-07, "logits/chosen": -0.036448847502470016, "logits/rejected": -0.01799830049276352, "logps/chosen": -257.7017822265625, "logps/rejected": -440.7272033691406, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.4678858518600464, "rewards/margins": 18.25020980834961, "rewards/rejected": -19.718095779418945, "step": 4651 }, { "epoch": 1.59, "learning_rate": 2.147343985511253e-07, "logits/chosen": 0.00343142612837255, "logits/rejected": 0.027448613196611404, "logps/chosen": -150.9197540283203, "logps/rejected": -326.29632568359375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.5276563167572021, "rewards/margins": 17.14760398864746, "rewards/rejected": -17.675262451171875, "step": 4652 }, { "epoch": 1.59, "learning_rate": 2.1439230430800592e-07, "logits/chosen": 0.012258186936378479, "logits/rejected": 0.05538591742515564, "logps/chosen": -228.5741424560547, "logps/rejected": -352.8897705078125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9516672492027283, "rewards/margins": 16.138671875, "rewards/rejected": -17.090335845947266, "step": 4653 }, { "epoch": 1.59, "learning_rate": 2.14050450057196e-07, "logits/chosen": -0.00047583106788806617, "logits/rejected": 0.025429852306842804, "logps/chosen": -201.48072814941406, "logps/rejected": -349.58221435546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.461960792541504, "rewards/margins": 15.102502822875977, "rewards/rejected": -16.564464569091797, "step": 4654 }, { "epoch": 1.59, "learning_rate": 2.137088359031276e-07, "logits/chosen": 0.13612288236618042, "logits/rejected": 0.15879184007644653, "logps/chosen": -231.9835662841797, "logps/rejected": -371.42041015625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.9938468337059021, "rewards/margins": 16.687891006469727, "rewards/rejected": -17.681739807128906, "step": 4655 }, { "epoch": 1.59, "learning_rate": 2.1336746195015843e-07, "logits/chosen": -0.0014899407979100943, "logits/rejected": 0.02933334931731224, "logps/chosen": -229.11941528320312, "logps/rejected": -390.0697937011719, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.2881832122802734, "rewards/margins": 18.849246978759766, "rewards/rejected": -20.13743019104004, "step": 4656 }, { "epoch": 1.59, "learning_rate": 2.1302632830257395e-07, "logits/chosen": 0.07378317415714264, "logits/rejected": 0.0762786716222763, "logps/chosen": -149.9940948486328, "logps/rejected": -319.6522521972656, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -2.835073471069336, "rewards/margins": 14.732271194458008, "rewards/rejected": -17.567344665527344, "step": 4657 }, { "epoch": 1.59, "learning_rate": 2.126854350645858e-07, "logits/chosen": 0.012185954488813877, "logits/rejected": 0.04110151156783104, "logps/chosen": -246.82965087890625, "logps/rejected": -411.23980712890625, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -1.2408533096313477, "rewards/margins": 19.32068634033203, "rewards/rejected": -20.561540603637695, "step": 4658 }, { "epoch": 1.59, "learning_rate": 2.1234478234033171e-07, "logits/chosen": 0.06746106594800949, "logits/rejected": 0.10179483145475388, "logps/chosen": -203.09713745117188, "logps/rejected": -305.6189880371094, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.8452810049057007, "rewards/margins": 15.283828735351562, "rewards/rejected": -17.129108428955078, "step": 4659 }, { "epoch": 1.59, "learning_rate": 2.120043702338772e-07, "logits/chosen": -0.10428880900144577, "logits/rejected": -0.05692971870303154, "logps/chosen": -234.1704864501953, "logps/rejected": -397.28778076171875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.551802635192871, "rewards/margins": 18.961750030517578, "rewards/rejected": -20.513553619384766, "step": 4660 }, { "epoch": 1.59, "learning_rate": 2.1166419884921261e-07, "logits/chosen": -0.0282756220549345, "logits/rejected": 0.00012239583884365857, "logps/chosen": -168.97079467773438, "logps/rejected": -337.4158020019531, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -2.29376482963562, "rewards/margins": 15.511913299560547, "rewards/rejected": -17.80567741394043, "step": 4661 }, { "epoch": 1.59, "learning_rate": 2.1132426829025618e-07, "logits/chosen": -0.06420345604419708, "logits/rejected": -0.035570841282606125, "logps/chosen": -216.6804656982422, "logps/rejected": -375.74053955078125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.417924165725708, "rewards/margins": 16.42788314819336, "rewards/rejected": -18.845806121826172, "step": 4662 }, { "epoch": 1.59, "learning_rate": 2.1098457866085194e-07, "logits/chosen": -0.14310184121131897, "logits/rejected": -0.10424046218395233, "logps/chosen": -282.3256530761719, "logps/rejected": -415.0690002441406, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.18845343589782715, "rewards/margins": 18.069740295410156, "rewards/rejected": -18.258193969726562, "step": 4663 }, { "epoch": 1.59, "learning_rate": 2.1064513006477013e-07, "logits/chosen": -0.007641870062798262, "logits/rejected": 0.047410495579242706, "logps/chosen": -201.2142333984375, "logps/rejected": -373.8123474121094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.14243067800998688, "rewards/margins": 19.841772079467773, "rewards/rejected": -19.984201431274414, "step": 4664 }, { "epoch": 1.59, "learning_rate": 2.103059226057077e-07, "logits/chosen": 0.06551098078489304, "logits/rejected": 0.09761401265859604, "logps/chosen": -190.0516357421875, "logps/rejected": -310.7618408203125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.7770062685012817, "rewards/margins": 13.848376274108887, "rewards/rejected": -15.625383377075195, "step": 4665 }, { "epoch": 1.59, "learning_rate": 2.0996695638728788e-07, "logits/chosen": 0.03523612394928932, "logits/rejected": 0.06268446147441864, "logps/chosen": -240.0833282470703, "logps/rejected": -372.2187805175781, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.588731050491333, "rewards/margins": 14.674463272094727, "rewards/rejected": -15.263193130493164, "step": 4666 }, { "epoch": 1.59, "learning_rate": 2.0962823151306053e-07, "logits/chosen": -0.07186242938041687, "logits/rejected": -0.025066427886486053, "logps/chosen": -227.22337341308594, "logps/rejected": -399.7890625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.7838294506072998, "rewards/margins": 18.741098403930664, "rewards/rejected": -19.524925231933594, "step": 4667 }, { "epoch": 1.59, "learning_rate": 2.092897480865008e-07, "logits/chosen": -0.015013918280601501, "logits/rejected": 0.004458328243345022, "logps/chosen": -194.17848205566406, "logps/rejected": -332.8533020019531, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.6628739833831787, "rewards/margins": 14.408599853515625, "rewards/rejected": -17.07147216796875, "step": 4668 }, { "epoch": 1.59, "learning_rate": 2.0895150621101087e-07, "logits/chosen": 0.15198658406734467, "logits/rejected": 0.15857218205928802, "logps/chosen": -167.97418212890625, "logps/rejected": -358.2357177734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.0351712703704834, "rewards/margins": 16.322397232055664, "rewards/rejected": -19.357568740844727, "step": 4669 }, { "epoch": 1.59, "learning_rate": 2.0861350598991946e-07, "logits/chosen": 0.10999275743961334, "logits/rejected": 0.1224530041217804, "logps/chosen": -191.4100799560547, "logps/rejected": -320.87359619140625, "loss": 0.0314, "rewards/accuracies": 0.9375, "rewards/chosen": -2.115684986114502, "rewards/margins": 13.889686584472656, "rewards/rejected": -16.005369186401367, "step": 4670 }, { "epoch": 1.59, "learning_rate": 2.0827574752648036e-07, "logits/chosen": -0.006608597002923489, "logits/rejected": 0.03130490332841873, "logps/chosen": -213.02880859375, "logps/rejected": -360.63043212890625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.8456236124038696, "rewards/margins": 17.456775665283203, "rewards/rejected": -19.302398681640625, "step": 4671 }, { "epoch": 1.59, "learning_rate": 2.079382309238743e-07, "logits/chosen": 0.010535565204918385, "logits/rejected": 0.028622619807720184, "logps/chosen": -229.53009033203125, "logps/rejected": -334.13836669921875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.226028323173523, "rewards/margins": 13.110574722290039, "rewards/rejected": -14.336602210998535, "step": 4672 }, { "epoch": 1.59, "learning_rate": 2.0760095628520823e-07, "logits/chosen": 0.011811729520559311, "logits/rejected": 0.06198029965162277, "logps/chosen": -174.92886352539062, "logps/rejected": -304.8710632324219, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.5344665050506592, "rewards/margins": 14.527485847473145, "rewards/rejected": -16.06195068359375, "step": 4673 }, { "epoch": 1.6, "learning_rate": 2.072639237135142e-07, "logits/chosen": 0.045726850628852844, "logits/rejected": 0.07547289878129959, "logps/chosen": -225.59898376464844, "logps/rejected": -412.2420959472656, "loss": 0.0289, "rewards/accuracies": 0.9375, "rewards/chosen": -2.572957992553711, "rewards/margins": 19.198598861694336, "rewards/rejected": -21.771556854248047, "step": 4674 }, { "epoch": 1.6, "learning_rate": 2.0692713331175192e-07, "logits/chosen": 0.09936880320310593, "logits/rejected": 0.12062438577413559, "logps/chosen": -176.4462890625, "logps/rejected": -318.5314025878906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.8934078216552734, "rewards/margins": 14.32394790649414, "rewards/rejected": -16.217355728149414, "step": 4675 }, { "epoch": 1.6, "learning_rate": 2.065905851828056e-07, "logits/chosen": -0.19427765905857086, "logits/rejected": -0.1451614648103714, "logps/chosen": -265.6328125, "logps/rejected": -456.6537780761719, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.8472535610198975, "rewards/margins": 20.983806610107422, "rewards/rejected": -22.83106231689453, "step": 4676 }, { "epoch": 1.6, "learning_rate": 2.0625427942948614e-07, "logits/chosen": 0.022306280210614204, "logits/rejected": 0.049953117966651917, "logps/chosen": -150.98394775390625, "logps/rejected": -315.8080749511719, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.9941867589950562, "rewards/margins": 13.761491775512695, "rewards/rejected": -14.7556791305542, "step": 4677 }, { "epoch": 1.6, "learning_rate": 2.0591821615453075e-07, "logits/chosen": 0.025655033066868782, "logits/rejected": 0.029910581186413765, "logps/chosen": -217.1685791015625, "logps/rejected": -397.15576171875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.1933702230453491, "rewards/margins": 17.39127540588379, "rewards/rejected": -18.58464813232422, "step": 4678 }, { "epoch": 1.6, "learning_rate": 2.055823954606014e-07, "logits/chosen": -0.026201635599136353, "logits/rejected": 0.015550263226032257, "logps/chosen": -243.1892547607422, "logps/rejected": -444.5750732421875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.836553931236267, "rewards/margins": 20.64206886291504, "rewards/rejected": -22.478622436523438, "step": 4679 }, { "epoch": 1.6, "learning_rate": 2.0524681745028704e-07, "logits/chosen": -0.08612508326768875, "logits/rejected": -0.06273853033781052, "logps/chosen": -265.2512512207031, "logps/rejected": -444.55426025390625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9764989614486694, "rewards/margins": 19.60331153869629, "rewards/rejected": -20.579811096191406, "step": 4680 }, { "epoch": 1.6, "learning_rate": 2.049114822261022e-07, "logits/chosen": -0.06415757536888123, "logits/rejected": -0.06174331158399582, "logps/chosen": -252.93032836914062, "logps/rejected": -467.5532531738281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.789447546005249, "rewards/margins": 19.72491455078125, "rewards/rejected": -20.514362335205078, "step": 4681 }, { "epoch": 1.6, "learning_rate": 2.045763898904873e-07, "logits/chosen": -0.12103231251239777, "logits/rejected": -0.05834326148033142, "logps/chosen": -277.15155029296875, "logps/rejected": -396.3853454589844, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.0232062339782715, "rewards/margins": 18.329830169677734, "rewards/rejected": -19.35303497314453, "step": 4682 }, { "epoch": 1.6, "learning_rate": 2.042415405458079e-07, "logits/chosen": 0.044364288449287415, "logits/rejected": 0.05666995048522949, "logps/chosen": -162.37098693847656, "logps/rejected": -318.7306823730469, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.2296091318130493, "rewards/margins": 14.60903549194336, "rewards/rejected": -15.838643074035645, "step": 4683 }, { "epoch": 1.6, "learning_rate": 2.0390693429435623e-07, "logits/chosen": 0.024833576753735542, "logits/rejected": 0.05269831046462059, "logps/chosen": -191.67071533203125, "logps/rejected": -345.4894714355469, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.9324808120727539, "rewards/margins": 16.503328323364258, "rewards/rejected": -17.435808181762695, "step": 4684 }, { "epoch": 1.6, "learning_rate": 2.0357257123835004e-07, "logits/chosen": 0.07126421481370926, "logits/rejected": 0.08404035866260529, "logps/chosen": -229.32284545898438, "logps/rejected": -412.7204284667969, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.029000997543335, "rewards/margins": 16.90538215637207, "rewards/rejected": -18.934383392333984, "step": 4685 }, { "epoch": 1.6, "learning_rate": 2.0323845147993213e-07, "logits/chosen": 0.09123010188341141, "logits/rejected": 0.11034353822469711, "logps/chosen": -203.74160766601562, "logps/rejected": -360.34967041015625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.8407965898513794, "rewards/margins": 15.3402099609375, "rewards/rejected": -17.181005477905273, "step": 4686 }, { "epoch": 1.6, "learning_rate": 2.0290457512117166e-07, "logits/chosen": 0.008468332700431347, "logits/rejected": 0.03436723351478577, "logps/chosen": -233.02182006835938, "logps/rejected": -342.7437438964844, "loss": 0.0232, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8069204092025757, "rewards/margins": 14.146034240722656, "rewards/rejected": -15.952953338623047, "step": 4687 }, { "epoch": 1.6, "learning_rate": 2.0257094226406368e-07, "logits/chosen": 0.06834036856889725, "logits/rejected": 0.08922044187784195, "logps/chosen": -172.726806640625, "logps/rejected": -287.093505859375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.658278226852417, "rewards/margins": 14.280536651611328, "rewards/rejected": -14.938815116882324, "step": 4688 }, { "epoch": 1.6, "learning_rate": 2.022375530105276e-07, "logits/chosen": -0.02848324179649353, "logits/rejected": 0.00020155998936388642, "logps/chosen": -256.43243408203125, "logps/rejected": -401.50537109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2923038005828857, "rewards/margins": 18.16613006591797, "rewards/rejected": -19.458433151245117, "step": 4689 }, { "epoch": 1.6, "learning_rate": 2.019044074624101e-07, "logits/chosen": 0.025828111916780472, "logits/rejected": 0.05087031424045563, "logps/chosen": -250.00254821777344, "logps/rejected": -463.0862731933594, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.619541645050049, "rewards/margins": 18.600902557373047, "rewards/rejected": -22.220441818237305, "step": 4690 }, { "epoch": 1.6, "learning_rate": 2.015715057214823e-07, "logits/chosen": -0.04874277859926224, "logits/rejected": -0.0005129401106387377, "logps/chosen": -230.6470184326172, "logps/rejected": -317.6936950683594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9072705507278442, "rewards/margins": 14.460137367248535, "rewards/rejected": -16.367408752441406, "step": 4691 }, { "epoch": 1.6, "learning_rate": 2.0123884788944033e-07, "logits/chosen": 0.035855308175086975, "logits/rejected": 0.07205235213041306, "logps/chosen": -193.14822387695312, "logps/rejected": -371.89288330078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.159530520439148, "rewards/margins": 17.967853546142578, "rewards/rejected": -19.127384185791016, "step": 4692 }, { "epoch": 1.6, "learning_rate": 2.0090643406790774e-07, "logits/chosen": 0.12673166394233704, "logits/rejected": 0.1735294908285141, "logps/chosen": -196.55397033691406, "logps/rejected": -322.2936096191406, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.120311737060547, "rewards/margins": 17.049226760864258, "rewards/rejected": -19.169538497924805, "step": 4693 }, { "epoch": 1.6, "learning_rate": 2.005742643584315e-07, "logits/chosen": -0.0014170992653816938, "logits/rejected": -0.0076936944387853146, "logps/chosen": -174.83544921875, "logps/rejected": -415.8212890625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9167897701263428, "rewards/margins": 20.381179809570312, "rewards/rejected": -21.297969818115234, "step": 4694 }, { "epoch": 1.6, "learning_rate": 2.0024233886248533e-07, "logits/chosen": -0.009077180176973343, "logits/rejected": 0.045457810163497925, "logps/chosen": -270.18328857421875, "logps/rejected": -384.424560546875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.3362608551979065, "rewards/margins": 17.846288681030273, "rewards/rejected": -18.18255043029785, "step": 4695 }, { "epoch": 1.6, "learning_rate": 1.9991065768146786e-07, "logits/chosen": -0.05143778771162033, "logits/rejected": 0.016820410266518593, "logps/chosen": -247.93023681640625, "logps/rejected": -383.3382263183594, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.045781135559082, "rewards/margins": 18.3480224609375, "rewards/rejected": -20.393802642822266, "step": 4696 }, { "epoch": 1.6, "learning_rate": 1.9957922091670275e-07, "logits/chosen": -0.016878414899110794, "logits/rejected": -0.0008479513926431537, "logps/chosen": -226.48101806640625, "logps/rejected": -417.0912780761719, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.335097551345825, "rewards/margins": 19.009403228759766, "rewards/rejected": -21.344499588012695, "step": 4697 }, { "epoch": 1.6, "learning_rate": 1.9924802866943969e-07, "logits/chosen": -0.038680266588926315, "logits/rejected": -0.00509954709559679, "logps/chosen": -237.88343811035156, "logps/rejected": -391.0456848144531, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.2564850449562073, "rewards/margins": 17.168792724609375, "rewards/rejected": -17.425277709960938, "step": 4698 }, { "epoch": 1.6, "learning_rate": 1.9891708104085315e-07, "logits/chosen": 0.009978585876524448, "logits/rejected": 0.024105018004775047, "logps/chosen": -205.5266571044922, "logps/rejected": -365.98614501953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.1297485828399658, "rewards/margins": 17.29342269897461, "rewards/rejected": -18.42317008972168, "step": 4699 }, { "epoch": 1.6, "learning_rate": 1.9858637813204349e-07, "logits/chosen": 0.021651988849043846, "logits/rejected": 0.05579522252082825, "logps/chosen": -237.4445037841797, "logps/rejected": -378.3921813964844, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.050074756145477295, "rewards/margins": 17.363615036010742, "rewards/rejected": -17.41368865966797, "step": 4700 }, { "epoch": 1.6, "learning_rate": 1.9825592004403523e-07, "logits/chosen": -0.05845209211111069, "logits/rejected": -0.04568064212799072, "logps/chosen": -243.34722900390625, "logps/rejected": -384.4964599609375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.9731003046035767, "rewards/margins": 13.674988746643066, "rewards/rejected": -15.648089408874512, "step": 4701 }, { "epoch": 1.6, "learning_rate": 1.979257068777791e-07, "logits/chosen": -0.006046542897820473, "logits/rejected": 9.807926107896492e-05, "logps/chosen": -181.05523681640625, "logps/rejected": -304.3268737792969, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.4612695872783661, "rewards/margins": 13.156824111938477, "rewards/rejected": -13.618095397949219, "step": 4702 }, { "epoch": 1.61, "learning_rate": 1.9759573873415103e-07, "logits/chosen": -0.05013227462768555, "logits/rejected": -0.04270505532622337, "logps/chosen": -195.19943237304688, "logps/rejected": -381.8468322753906, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.459019899368286, "rewards/margins": 16.432514190673828, "rewards/rejected": -18.89153289794922, "step": 4703 }, { "epoch": 1.61, "learning_rate": 1.9726601571395075e-07, "logits/chosen": 0.15187032520771027, "logits/rejected": 0.1764780431985855, "logps/chosen": -154.19131469726562, "logps/rejected": -287.3634338378906, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.3646143674850464, "rewards/margins": 14.38016128540039, "rewards/rejected": -15.744775772094727, "step": 4704 }, { "epoch": 1.61, "learning_rate": 1.969365379179052e-07, "logits/chosen": 0.07353923469781876, "logits/rejected": 0.0734478309750557, "logps/chosen": -186.06866455078125, "logps/rejected": -377.47607421875, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.17227435111999512, "rewards/margins": 19.005199432373047, "rewards/rejected": -18.83292579650879, "step": 4705 }, { "epoch": 1.61, "learning_rate": 1.9660730544666483e-07, "logits/chosen": -0.01564333774149418, "logits/rejected": 0.010237584821879864, "logps/chosen": -224.9115447998047, "logps/rejected": -343.1815185546875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -2.0920169353485107, "rewards/margins": 15.229567527770996, "rewards/rejected": -17.321584701538086, "step": 4706 }, { "epoch": 1.61, "learning_rate": 1.96278318400805e-07, "logits/chosen": -0.0611032135784626, "logits/rejected": -0.015735793858766556, "logps/chosen": -209.93560791015625, "logps/rejected": -294.4875793457031, "loss": 0.0606, "rewards/accuracies": 0.9375, "rewards/chosen": -2.30704665184021, "rewards/margins": 13.038914680480957, "rewards/rejected": -15.34596061706543, "step": 4707 }, { "epoch": 1.61, "learning_rate": 1.9594957688082792e-07, "logits/chosen": 0.08224866539239883, "logits/rejected": 0.11895793676376343, "logps/chosen": -186.05665588378906, "logps/rejected": -306.03643798828125, "loss": 0.03, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8838683366775513, "rewards/margins": 13.589950561523438, "rewards/rejected": -15.4738187789917, "step": 4708 }, { "epoch": 1.61, "learning_rate": 1.9562108098715856e-07, "logits/chosen": 0.047700654715299606, "logits/rejected": 0.05133312568068504, "logps/chosen": -193.27459716796875, "logps/rejected": -397.9335021972656, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3232578039169312, "rewards/margins": 17.0419864654541, "rewards/rejected": -18.365243911743164, "step": 4709 }, { "epoch": 1.61, "learning_rate": 1.9529283082014846e-07, "logits/chosen": 0.0035184219013899565, "logits/rejected": 0.05289096012711525, "logps/chosen": -241.7465362548828, "logps/rejected": -315.21380615234375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.2749949097633362, "rewards/margins": 13.992691993713379, "rewards/rejected": -14.26768684387207, "step": 4710 }, { "epoch": 1.61, "learning_rate": 1.949648264800735e-07, "logits/chosen": -0.010740196332335472, "logits/rejected": 0.014603671617805958, "logps/chosen": -191.18948364257812, "logps/rejected": -328.54486083984375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.489133358001709, "rewards/margins": 13.730047225952148, "rewards/rejected": -16.219181060791016, "step": 4711 }, { "epoch": 1.61, "learning_rate": 1.946370680671341e-07, "logits/chosen": -0.0889764353632927, "logits/rejected": -0.01128062792122364, "logps/chosen": -272.7592468261719, "logps/rejected": -382.91497802734375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.7125871777534485, "rewards/margins": 18.377033233642578, "rewards/rejected": -19.089618682861328, "step": 4712 }, { "epoch": 1.61, "learning_rate": 1.9430955568145614e-07, "logits/chosen": 0.058090295642614365, "logits/rejected": 0.08760280162096024, "logps/chosen": -196.20086669921875, "logps/rejected": -305.22113037109375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.6374258995056152, "rewards/margins": 12.185919761657715, "rewards/rejected": -14.823345184326172, "step": 4713 }, { "epoch": 1.61, "learning_rate": 1.9398228942309014e-07, "logits/chosen": 0.0586724579334259, "logits/rejected": 0.0722903460264206, "logps/chosen": -178.1727294921875, "logps/rejected": -381.6839599609375, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -1.0849072933197021, "rewards/margins": 18.949071884155273, "rewards/rejected": -20.033977508544922, "step": 4714 }, { "epoch": 1.61, "learning_rate": 1.936552693920117e-07, "logits/chosen": 0.07919236272573471, "logits/rejected": 0.09038171917200089, "logps/chosen": -203.91070556640625, "logps/rejected": -374.7762451171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.8080627918243408, "rewards/margins": 15.185099601745605, "rewards/rejected": -16.993162155151367, "step": 4715 }, { "epoch": 1.61, "learning_rate": 1.9332849568812037e-07, "logits/chosen": 0.08379349112510681, "logits/rejected": 0.1369507908821106, "logps/chosen": -185.83828735351562, "logps/rejected": -296.9292907714844, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.3897734880447388, "rewards/margins": 16.9870548248291, "rewards/rejected": -18.376827239990234, "step": 4716 }, { "epoch": 1.61, "learning_rate": 1.930019684112414e-07, "logits/chosen": 0.12194359302520752, "logits/rejected": 0.1713261902332306, "logps/chosen": -167.85946655273438, "logps/rejected": -284.8990173339844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.950548768043518, "rewards/margins": 14.320444107055664, "rewards/rejected": -16.270994186401367, "step": 4717 }, { "epoch": 1.61, "learning_rate": 1.926756876611244e-07, "logits/chosen": 0.00861869566142559, "logits/rejected": 0.01598074659705162, "logps/chosen": -174.72576904296875, "logps/rejected": -365.68048095703125, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -2.556025743484497, "rewards/margins": 14.330217361450195, "rewards/rejected": -16.886245727539062, "step": 4718 }, { "epoch": 1.61, "learning_rate": 1.9234965353744326e-07, "logits/chosen": -0.07775158435106277, "logits/rejected": -0.019189152866601944, "logps/chosen": -191.33192443847656, "logps/rejected": -319.19775390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.3258068263530731, "rewards/margins": 18.001819610595703, "rewards/rejected": -18.327625274658203, "step": 4719 }, { "epoch": 1.61, "learning_rate": 1.9202386613979714e-07, "logits/chosen": 0.0019120455253869295, "logits/rejected": 0.017942378297448158, "logps/chosen": -190.67886352539062, "logps/rejected": -351.2549743652344, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.8261563777923584, "rewards/margins": 15.366223335266113, "rewards/rejected": -17.192378997802734, "step": 4720 }, { "epoch": 1.61, "learning_rate": 1.9169832556770992e-07, "logits/chosen": -0.055954284965991974, "logits/rejected": -0.02779787965118885, "logps/chosen": -245.510986328125, "logps/rejected": -422.77191162109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3121999502182007, "rewards/margins": 20.763277053833008, "rewards/rejected": -22.07547950744629, "step": 4721 }, { "epoch": 1.61, "learning_rate": 1.9137303192062892e-07, "logits/chosen": 0.06516166776418686, "logits/rejected": 0.09957505017518997, "logps/chosen": -255.49020385742188, "logps/rejected": -464.92193603515625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.295215368270874, "rewards/margins": 23.151084899902344, "rewards/rejected": -25.446300506591797, "step": 4722 }, { "epoch": 1.61, "learning_rate": 1.9104798529792788e-07, "logits/chosen": -0.06699121743440628, "logits/rejected": -0.02426452562212944, "logps/chosen": -210.61209106445312, "logps/rejected": -296.01129150390625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.8495750427246094, "rewards/margins": 11.720215797424316, "rewards/rejected": -12.56978988647461, "step": 4723 }, { "epoch": 1.61, "learning_rate": 1.9072318579890323e-07, "logits/chosen": 0.034590963274240494, "logits/rejected": 0.03841911256313324, "logps/chosen": -151.52835083007812, "logps/rejected": -308.86273193359375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.3631929159164429, "rewards/margins": 14.45552921295166, "rewards/rejected": -15.818721771240234, "step": 4724 }, { "epoch": 1.61, "learning_rate": 1.9039863352277706e-07, "logits/chosen": -0.017817437648773193, "logits/rejected": 0.030617130920290947, "logps/chosen": -183.60946655273438, "logps/rejected": -336.4541931152344, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.46594032645225525, "rewards/margins": 18.48628807067871, "rewards/rejected": -18.020347595214844, "step": 4725 }, { "epoch": 1.61, "learning_rate": 1.9007432856869597e-07, "logits/chosen": 0.05432894453406334, "logits/rejected": 0.07047687470912933, "logps/chosen": -233.56228637695312, "logps/rejected": -458.0042724609375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.215704321861267, "rewards/margins": 20.034337997436523, "rewards/rejected": -21.250041961669922, "step": 4726 }, { "epoch": 1.61, "learning_rate": 1.8975027103572993e-07, "logits/chosen": 0.01390120666474104, "logits/rejected": 0.040258459746837616, "logps/chosen": -199.3129119873047, "logps/rejected": -342.79815673828125, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -1.3873178958892822, "rewards/margins": 15.957324981689453, "rewards/rejected": -17.344642639160156, "step": 4727 }, { "epoch": 1.61, "learning_rate": 1.894264610228744e-07, "logits/chosen": 0.1539994329214096, "logits/rejected": 0.16594263911247253, "logps/chosen": -155.14199829101562, "logps/rejected": -275.34912109375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.9855414628982544, "rewards/margins": 11.401718139648438, "rewards/rejected": -13.387260437011719, "step": 4728 }, { "epoch": 1.61, "learning_rate": 1.8910289862904915e-07, "logits/chosen": 0.08889329433441162, "logits/rejected": 0.12195314466953278, "logps/chosen": -200.45135498046875, "logps/rejected": -363.1631164550781, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9653676748275757, "rewards/margins": 16.919769287109375, "rewards/rejected": -17.885133743286133, "step": 4729 }, { "epoch": 1.61, "learning_rate": 1.8877958395309756e-07, "logits/chosen": -0.0573701448738575, "logits/rejected": -0.022624459117650986, "logps/chosen": -236.90943908691406, "logps/rejected": -437.9163513183594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.166727066040039, "rewards/margins": 21.3218936920166, "rewards/rejected": -22.488616943359375, "step": 4730 }, { "epoch": 1.61, "learning_rate": 1.8845651709378786e-07, "logits/chosen": 0.09588772803544998, "logits/rejected": 0.09958197921514511, "logps/chosen": -146.5109405517578, "logps/rejected": -313.4308166503906, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.4966142177581787, "rewards/margins": 14.051980018615723, "rewards/rejected": -16.548595428466797, "step": 4731 }, { "epoch": 1.62, "learning_rate": 1.8813369814981273e-07, "logits/chosen": 0.034198347479104996, "logits/rejected": 0.052046194672584534, "logps/chosen": -186.69793701171875, "logps/rejected": -363.37469482421875, "loss": 0.0239, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0056898593902588, "rewards/margins": 16.009305953979492, "rewards/rejected": -17.014997482299805, "step": 4732 }, { "epoch": 1.62, "learning_rate": 1.8781112721978898e-07, "logits/chosen": 0.017407258972525597, "logits/rejected": 0.025777209550142288, "logps/chosen": -208.57904052734375, "logps/rejected": -367.29656982421875, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -1.0475271940231323, "rewards/margins": 15.960108757019043, "rewards/rejected": -17.00763511657715, "step": 4733 }, { "epoch": 1.62, "learning_rate": 1.8748880440225724e-07, "logits/chosen": 0.10249346494674683, "logits/rejected": 0.10043369233608246, "logps/chosen": -200.4160614013672, "logps/rejected": -422.6602478027344, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.3098044395446777, "rewards/margins": 19.502071380615234, "rewards/rejected": -21.811878204345703, "step": 4734 }, { "epoch": 1.62, "learning_rate": 1.871667297956828e-07, "logits/chosen": -0.038669653236866, "logits/rejected": 0.008101427927613258, "logps/chosen": -225.79486083984375, "logps/rejected": -381.5072021484375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0904382467269897, "rewards/margins": 18.587800979614258, "rewards/rejected": -19.678239822387695, "step": 4735 }, { "epoch": 1.62, "learning_rate": 1.8684490349845538e-07, "logits/chosen": -0.005592364817857742, "logits/rejected": 0.003235280280932784, "logps/chosen": -208.9891815185547, "logps/rejected": -419.99200439453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5598809719085693, "rewards/margins": 18.794519424438477, "rewards/rejected": -20.354400634765625, "step": 4736 }, { "epoch": 1.62, "learning_rate": 1.8652332560888762e-07, "logits/chosen": 0.029459547251462936, "logits/rejected": 0.055109962821006775, "logps/chosen": -212.33554077148438, "logps/rejected": -357.2864074707031, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.0883013010025024, "rewards/margins": 16.993621826171875, "rewards/rejected": -18.081924438476562, "step": 4737 }, { "epoch": 1.62, "learning_rate": 1.8620199622521815e-07, "logits/chosen": 0.18560263514518738, "logits/rejected": 0.19722643494606018, "logps/chosen": -138.35523986816406, "logps/rejected": -271.2496643066406, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.5421128273010254, "rewards/margins": 12.470266342163086, "rewards/rejected": -14.012378692626953, "step": 4738 }, { "epoch": 1.62, "learning_rate": 1.8588091544560813e-07, "logits/chosen": -0.09947001934051514, "logits/rejected": -0.05179523304104805, "logps/chosen": -232.19976806640625, "logps/rejected": -380.57891845703125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.0240122079849243, "rewards/margins": 19.23517417907715, "rewards/rejected": -20.259185791015625, "step": 4739 }, { "epoch": 1.62, "learning_rate": 1.85560083368143e-07, "logits/chosen": 0.07944278419017792, "logits/rejected": 0.11633838713169098, "logps/chosen": -250.46917724609375, "logps/rejected": -374.892333984375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.22378790378570557, "rewards/margins": 16.978002548217773, "rewards/rejected": -17.201793670654297, "step": 4740 }, { "epoch": 1.62, "learning_rate": 1.8523950009083322e-07, "logits/chosen": 0.09682196378707886, "logits/rejected": 0.1150033250451088, "logps/chosen": -135.912109375, "logps/rejected": -231.5314178466797, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.9365642070770264, "rewards/margins": 10.854615211486816, "rewards/rejected": -12.791179656982422, "step": 4741 }, { "epoch": 1.62, "learning_rate": 1.8491916571161203e-07, "logits/chosen": 0.047817129641771317, "logits/rejected": 0.09305152297019958, "logps/chosen": -216.0694580078125, "logps/rejected": -459.052001953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7129315733909607, "rewards/margins": 23.883413314819336, "rewards/rejected": -24.596342086791992, "step": 4742 }, { "epoch": 1.62, "learning_rate": 1.8459908032833736e-07, "logits/chosen": 0.07727239280939102, "logits/rejected": 0.11524057388305664, "logps/chosen": -206.75782775878906, "logps/rejected": -313.92816162109375, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -0.9492801427841187, "rewards/margins": 14.139022827148438, "rewards/rejected": -15.088302612304688, "step": 4743 }, { "epoch": 1.62, "learning_rate": 1.8427924403879113e-07, "logits/chosen": 0.02765977941453457, "logits/rejected": 0.06694246828556061, "logps/chosen": -225.6310577392578, "logps/rejected": -386.92401123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7255126237869263, "rewards/margins": 17.470375061035156, "rewards/rejected": -19.195886611938477, "step": 4744 }, { "epoch": 1.62, "learning_rate": 1.8395965694067838e-07, "logits/chosen": -0.04849182069301605, "logits/rejected": -0.02674938552081585, "logps/chosen": -216.20372009277344, "logps/rejected": -383.0978088378906, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.870082378387451, "rewards/margins": 16.76515007019043, "rewards/rejected": -19.63523292541504, "step": 4745 }, { "epoch": 1.62, "learning_rate": 1.836403191316289e-07, "logits/chosen": -0.012419085949659348, "logits/rejected": 0.023578571155667305, "logps/chosen": -204.53646850585938, "logps/rejected": -346.508056640625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.1516246795654297, "rewards/margins": 15.832120895385742, "rewards/rejected": -17.983745574951172, "step": 4746 }, { "epoch": 1.62, "learning_rate": 1.8332123070919593e-07, "logits/chosen": 0.04652449116110802, "logits/rejected": 0.061752524226903915, "logps/chosen": -201.146240234375, "logps/rejected": -402.9028625488281, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.7795095443725586, "rewards/margins": 18.04294776916504, "rewards/rejected": -20.822458267211914, "step": 4747 }, { "epoch": 1.62, "learning_rate": 1.8300239177085674e-07, "logits/chosen": 0.1024870052933693, "logits/rejected": 0.11764120310544968, "logps/chosen": -132.26339721679688, "logps/rejected": -250.15113830566406, "loss": 0.0699, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1310131549835205, "rewards/margins": 11.521356582641602, "rewards/rejected": -13.652371406555176, "step": 4748 }, { "epoch": 1.62, "learning_rate": 1.8268380241401183e-07, "logits/chosen": 0.09221432358026505, "logits/rejected": 0.1602787971496582, "logps/chosen": -203.47201538085938, "logps/rejected": -368.38311767578125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.8375263810157776, "rewards/margins": 19.879940032958984, "rewards/rejected": -20.717464447021484, "step": 4749 }, { "epoch": 1.62, "learning_rate": 1.8236546273598596e-07, "logits/chosen": 0.11918630450963974, "logits/rejected": 0.13166718184947968, "logps/chosen": -182.48223876953125, "logps/rejected": -311.5365295410156, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.02776247262954712, "rewards/margins": 15.358110427856445, "rewards/rejected": -15.385871887207031, "step": 4750 }, { "epoch": 1.62, "learning_rate": 1.8204737283402804e-07, "logits/chosen": -0.015517032705247402, "logits/rejected": -0.008709876798093319, "logps/chosen": -223.7843017578125, "logps/rejected": -398.42431640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.474487543106079, "rewards/margins": 17.339664459228516, "rewards/rejected": -18.814151763916016, "step": 4751 }, { "epoch": 1.62, "learning_rate": 1.817295328053091e-07, "logits/chosen": -0.13807016611099243, "logits/rejected": -0.11128611117601395, "logps/chosen": -248.07203674316406, "logps/rejected": -419.6389465332031, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.8102573156356812, "rewards/margins": 19.087665557861328, "rewards/rejected": -19.89792251586914, "step": 4752 }, { "epoch": 1.62, "learning_rate": 1.8141194274692605e-07, "logits/chosen": 0.08204906433820724, "logits/rejected": 0.08689170330762863, "logps/chosen": -189.48426818847656, "logps/rejected": -365.1946716308594, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.4811338186264038, "rewards/margins": 15.022476196289062, "rewards/rejected": -16.503610610961914, "step": 4753 }, { "epoch": 1.62, "learning_rate": 1.8109460275589771e-07, "logits/chosen": 0.1663246750831604, "logits/rejected": 0.18651911616325378, "logps/chosen": -197.74118041992188, "logps/rejected": -369.0623779296875, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -1.7962746620178223, "rewards/margins": 16.381080627441406, "rewards/rejected": -18.17735481262207, "step": 4754 }, { "epoch": 1.62, "learning_rate": 1.8077751292916664e-07, "logits/chosen": 0.038381341844797134, "logits/rejected": 0.07325763255357742, "logps/chosen": -182.75453186035156, "logps/rejected": -303.6649475097656, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3568408489227295, "rewards/margins": 14.450567245483398, "rewards/rejected": -15.80740737915039, "step": 4755 }, { "epoch": 1.62, "learning_rate": 1.8046067336360038e-07, "logits/chosen": -0.00755343260243535, "logits/rejected": 0.04014437273144722, "logps/chosen": -225.86849975585938, "logps/rejected": -377.7616271972656, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.33110773563385, "rewards/margins": 18.881885528564453, "rewards/rejected": -20.21299171447754, "step": 4756 }, { "epoch": 1.62, "learning_rate": 1.8014408415598815e-07, "logits/chosen": -0.051753006875514984, "logits/rejected": -0.03370664268732071, "logps/chosen": -191.38632202148438, "logps/rejected": -365.9422912597656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2028086185455322, "rewards/margins": 17.78202247619629, "rewards/rejected": -18.984832763671875, "step": 4757 }, { "epoch": 1.62, "learning_rate": 1.7982774540304402e-07, "logits/chosen": 0.07569152861833572, "logits/rejected": 0.11110525578260422, "logps/chosen": -229.74441528320312, "logps/rejected": -416.5862121582031, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.1923154592514038, "rewards/margins": 20.230087280273438, "rewards/rejected": -21.422401428222656, "step": 4758 }, { "epoch": 1.62, "learning_rate": 1.7951165720140538e-07, "logits/chosen": 0.060242924839258194, "logits/rejected": 0.09737851470708847, "logps/chosen": -179.40615844726562, "logps/rejected": -353.8930969238281, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.5375694632530212, "rewards/margins": 19.727134704589844, "rewards/rejected": -20.26470375061035, "step": 4759 }, { "epoch": 1.62, "learning_rate": 1.791958196476321e-07, "logits/chosen": 0.01165701076388359, "logits/rejected": 0.058373674750328064, "logps/chosen": -145.87648010253906, "logps/rejected": -205.50685119628906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3272916078567505, "rewards/margins": 11.408862113952637, "rewards/rejected": -12.73615550994873, "step": 4760 }, { "epoch": 1.62, "learning_rate": 1.7888023283820863e-07, "logits/chosen": -0.008875945582985878, "logits/rejected": 0.000537773419637233, "logps/chosen": -197.8032684326172, "logps/rejected": -340.7352600097656, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.6028519868850708, "rewards/margins": 14.408053398132324, "rewards/rejected": -16.010906219482422, "step": 4761 }, { "epoch": 1.63, "learning_rate": 1.7856489686954224e-07, "logits/chosen": -0.02216978929936886, "logits/rejected": 0.0052553885616362095, "logps/chosen": -198.91055297851562, "logps/rejected": -339.6949462890625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.424765110015869, "rewards/margins": 15.07529354095459, "rewards/rejected": -17.500059127807617, "step": 4762 }, { "epoch": 1.63, "learning_rate": 1.7824981183796416e-07, "logits/chosen": 0.017536131665110588, "logits/rejected": 0.02455567941069603, "logps/chosen": -190.81668090820312, "logps/rejected": -358.4145202636719, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.962714433670044, "rewards/margins": 14.634531021118164, "rewards/rejected": -16.597244262695312, "step": 4763 }, { "epoch": 1.63, "learning_rate": 1.7793497783972788e-07, "logits/chosen": 0.07554879039525986, "logits/rejected": 0.12645359337329865, "logps/chosen": -227.31741333007812, "logps/rejected": -285.62347412109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.4130346775054932, "rewards/margins": 13.408391952514648, "rewards/rejected": -14.821426391601562, "step": 4764 }, { "epoch": 1.63, "learning_rate": 1.7762039497101112e-07, "logits/chosen": 0.0630573108792305, "logits/rejected": 0.10844897478818893, "logps/chosen": -244.74148559570312, "logps/rejected": -373.93634033203125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.7623990178108215, "rewards/margins": 19.59168243408203, "rewards/rejected": -20.354080200195312, "step": 4765 }, { "epoch": 1.63, "learning_rate": 1.7730606332791486e-07, "logits/chosen": -0.0045950040221214294, "logits/rejected": 0.03436076641082764, "logps/chosen": -248.8250274658203, "logps/rejected": -353.7023010253906, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.29934751987457275, "rewards/margins": 13.935235977172852, "rewards/rejected": -13.635887145996094, "step": 4766 }, { "epoch": 1.63, "learning_rate": 1.7699198300646256e-07, "logits/chosen": -0.04060288518667221, "logits/rejected": 0.0007731514633633196, "logps/chosen": -249.24737548828125, "logps/rejected": -383.2789611816406, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9058533906936646, "rewards/margins": 17.438135147094727, "rewards/rejected": -18.3439884185791, "step": 4767 }, { "epoch": 1.63, "learning_rate": 1.7667815410260178e-07, "logits/chosen": 0.10205116868019104, "logits/rejected": 0.156916081905365, "logps/chosen": -223.59303283691406, "logps/rejected": -409.1406555175781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.078890085220337, "rewards/margins": 20.42008399963379, "rewards/rejected": -22.498971939086914, "step": 4768 }, { "epoch": 1.63, "learning_rate": 1.7636457671220296e-07, "logits/chosen": -0.05995932221412659, "logits/rejected": -0.009543633088469505, "logps/chosen": -226.79464721679688, "logps/rejected": -386.6043701171875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -2.630220890045166, "rewards/margins": 17.144973754882812, "rewards/rejected": -19.775196075439453, "step": 4769 }, { "epoch": 1.63, "learning_rate": 1.760512509310591e-07, "logits/chosen": 0.04293734207749367, "logits/rejected": 0.0786028578877449, "logps/chosen": -204.2548828125, "logps/rejected": -345.19744873046875, "loss": 0.0315, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8700944185256958, "rewards/margins": 15.680440902709961, "rewards/rejected": -17.550535202026367, "step": 4770 }, { "epoch": 1.63, "learning_rate": 1.7573817685488779e-07, "logits/chosen": 0.11507617682218552, "logits/rejected": 0.15185102820396423, "logps/chosen": -180.66587829589844, "logps/rejected": -311.2133483886719, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.9865071773529053, "rewards/margins": 14.927623748779297, "rewards/rejected": -15.914131164550781, "step": 4771 }, { "epoch": 1.63, "learning_rate": 1.7542535457932849e-07, "logits/chosen": 0.14149834215641022, "logits/rejected": 0.15710076689720154, "logps/chosen": -199.27395629882812, "logps/rejected": -389.0956726074219, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4754470586776733, "rewards/margins": 18.353025436401367, "rewards/rejected": -19.828472137451172, "step": 4772 }, { "epoch": 1.63, "learning_rate": 1.7511278419994334e-07, "logits/chosen": 0.00330140208825469, "logits/rejected": 0.04921272024512291, "logps/chosen": -163.21388244628906, "logps/rejected": -286.7323303222656, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6864264011383057, "rewards/margins": 15.300888061523438, "rewards/rejected": -16.987316131591797, "step": 4773 }, { "epoch": 1.63, "learning_rate": 1.7480046581221952e-07, "logits/chosen": 0.09035392850637436, "logits/rejected": 0.12694895267486572, "logps/chosen": -244.8820037841797, "logps/rejected": -344.88507080078125, "loss": 0.0432, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8748656511306763, "rewards/margins": 15.139897346496582, "rewards/rejected": -17.01476287841797, "step": 4774 }, { "epoch": 1.63, "learning_rate": 1.7448839951156514e-07, "logits/chosen": 0.03087536245584488, "logits/rejected": 0.06533605605363846, "logps/chosen": -184.61366271972656, "logps/rejected": -327.4942626953125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.8984884023666382, "rewards/margins": 14.230042457580566, "rewards/rejected": -16.12853240966797, "step": 4775 }, { "epoch": 1.63, "learning_rate": 1.7417658539331249e-07, "logits/chosen": -0.005533529445528984, "logits/rejected": 0.02423420175909996, "logps/chosen": -178.40150451660156, "logps/rejected": -340.84283447265625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1109635829925537, "rewards/margins": 16.44951629638672, "rewards/rejected": -17.56048011779785, "step": 4776 }, { "epoch": 1.63, "learning_rate": 1.7386502355271648e-07, "logits/chosen": 0.005182258784770966, "logits/rejected": 0.027007674798369408, "logps/chosen": -160.94769287109375, "logps/rejected": -328.4655456542969, "loss": 0.0621, "rewards/accuracies": 0.9375, "rewards/chosen": -1.448211669921875, "rewards/margins": 15.706933975219727, "rewards/rejected": -17.1551456451416, "step": 4777 }, { "epoch": 1.63, "learning_rate": 1.7355371408495467e-07, "logits/chosen": -0.010592276230454445, "logits/rejected": 0.00664797006174922, "logps/chosen": -190.68606567382812, "logps/rejected": -297.18487548828125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8748821020126343, "rewards/margins": 11.647333145141602, "rewards/rejected": -12.522215843200684, "step": 4778 }, { "epoch": 1.63, "learning_rate": 1.73242657085128e-07, "logits/chosen": -0.06783250719308853, "logits/rejected": -0.025377023965120316, "logps/chosen": -224.2528533935547, "logps/rejected": -375.03240966796875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.368774652481079, "rewards/margins": 18.026334762573242, "rewards/rejected": -19.395109176635742, "step": 4779 }, { "epoch": 1.63, "learning_rate": 1.7293185264826014e-07, "logits/chosen": -0.002034485340118408, "logits/rejected": 0.02122337929904461, "logps/chosen": -177.85984802246094, "logps/rejected": -311.69354248046875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -1.7349236011505127, "rewards/margins": 14.85725212097168, "rewards/rejected": -16.59217643737793, "step": 4780 }, { "epoch": 1.63, "learning_rate": 1.726213008692977e-07, "logits/chosen": 0.076167531311512, "logits/rejected": 0.09186305105686188, "logps/chosen": -246.2852783203125, "logps/rejected": -384.896484375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.904403805732727, "rewards/margins": 13.83731746673584, "rewards/rejected": -14.741721153259277, "step": 4781 }, { "epoch": 1.63, "learning_rate": 1.7231100184310953e-07, "logits/chosen": 0.008806370198726654, "logits/rejected": 0.03714147210121155, "logps/chosen": -200.9542694091797, "logps/rejected": -270.4725646972656, "loss": 0.0148, "rewards/accuracies": 0.9375, "rewards/chosen": -2.553565502166748, "rewards/margins": 10.018472671508789, "rewards/rejected": -12.572037696838379, "step": 4782 }, { "epoch": 1.63, "learning_rate": 1.720009556644879e-07, "logits/chosen": 0.03707551956176758, "logits/rejected": 0.05026666447520256, "logps/chosen": -162.0886993408203, "logps/rejected": -327.9294128417969, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.6194151639938354, "rewards/margins": 16.41407012939453, "rewards/rejected": -17.033485412597656, "step": 4783 }, { "epoch": 1.63, "learning_rate": 1.7169116242814797e-07, "logits/chosen": 0.03604748472571373, "logits/rejected": 0.0874723568558693, "logps/chosen": -215.657958984375, "logps/rejected": -346.1628723144531, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.132826805114746, "rewards/margins": 16.884227752685547, "rewards/rejected": -18.01705551147461, "step": 4784 }, { "epoch": 1.63, "learning_rate": 1.7138162222872655e-07, "logits/chosen": 0.0714632198214531, "logits/rejected": 0.08948151022195816, "logps/chosen": -209.3229217529297, "logps/rejected": -350.1057434082031, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.2652029991149902, "rewards/margins": 15.3941068649292, "rewards/rejected": -17.65930938720703, "step": 4785 }, { "epoch": 1.63, "learning_rate": 1.7107233516078478e-07, "logits/chosen": -0.05291242524981499, "logits/rejected": -0.024227309972047806, "logps/chosen": -251.4431610107422, "logps/rejected": -417.6413269042969, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -1.5592572689056396, "rewards/margins": 16.25126075744629, "rewards/rejected": -17.810516357421875, "step": 4786 }, { "epoch": 1.63, "learning_rate": 1.7076330131880524e-07, "logits/chosen": -0.09616580605506897, "logits/rejected": -0.07883359491825104, "logps/chosen": -233.3225860595703, "logps/rejected": -399.48382568359375, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": -1.5717568397521973, "rewards/margins": 18.1424560546875, "rewards/rejected": -19.714210510253906, "step": 4787 }, { "epoch": 1.63, "learning_rate": 1.704545207971928e-07, "logits/chosen": 0.04907833784818649, "logits/rejected": 0.034977030009031296, "logps/chosen": -184.25582885742188, "logps/rejected": -388.12945556640625, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.9303960800170898, "rewards/margins": 17.033523559570312, "rewards/rejected": -18.96392059326172, "step": 4788 }, { "epoch": 1.63, "learning_rate": 1.7014599369027692e-07, "logits/chosen": -0.003965453244745731, "logits/rejected": 0.029492327943444252, "logps/chosen": -240.60360717773438, "logps/rejected": -430.36041259765625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.9959514141082764, "rewards/margins": 18.03618621826172, "rewards/rejected": -20.032135009765625, "step": 4789 }, { "epoch": 1.63, "learning_rate": 1.6983772009230735e-07, "logits/chosen": 0.06488462537527084, "logits/rejected": 0.07911042124032974, "logps/chosen": -220.32763671875, "logps/rejected": -421.7653503417969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3290446996688843, "rewards/margins": 20.105270385742188, "rewards/rejected": -21.434314727783203, "step": 4790 }, { "epoch": 1.64, "learning_rate": 1.6952970009745792e-07, "logits/chosen": 0.09408022463321686, "logits/rejected": 0.14140766859054565, "logps/chosen": -146.90670776367188, "logps/rejected": -260.5074768066406, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7288818955421448, "rewards/margins": 13.643888473510742, "rewards/rejected": -14.372770309448242, "step": 4791 }, { "epoch": 1.64, "learning_rate": 1.6922193379982452e-07, "logits/chosen": -0.15102577209472656, "logits/rejected": -0.11041097342967987, "logps/chosen": -258.05755615234375, "logps/rejected": -478.6252746582031, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 0.47078394889831543, "rewards/margins": 23.013648986816406, "rewards/rejected": -22.542865753173828, "step": 4792 }, { "epoch": 1.64, "learning_rate": 1.6891442129342525e-07, "logits/chosen": -0.022463349625468254, "logits/rejected": 0.016966788098216057, "logps/chosen": -181.02801513671875, "logps/rejected": -255.8943634033203, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010417401790618896, "rewards/margins": 12.5029935836792, "rewards/rejected": -12.504035949707031, "step": 4793 }, { "epoch": 1.64, "learning_rate": 1.6860716267220087e-07, "logits/chosen": 0.08203566074371338, "logits/rejected": 0.12598711252212524, "logps/chosen": -199.24847412109375, "logps/rejected": -345.70953369140625, "loss": 0.0256, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7946152687072754, "rewards/margins": 16.566181182861328, "rewards/rejected": -18.360795974731445, "step": 4794 }, { "epoch": 1.64, "learning_rate": 1.6830015803001497e-07, "logits/chosen": -0.03291702643036842, "logits/rejected": 0.002109321765601635, "logps/chosen": -254.6378173828125, "logps/rejected": -369.3616638183594, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.6842200756072998, "rewards/margins": 16.1121768951416, "rewards/rejected": -16.796396255493164, "step": 4795 }, { "epoch": 1.64, "learning_rate": 1.6799340746065326e-07, "logits/chosen": -0.009819294326007366, "logits/rejected": 0.01497022807598114, "logps/chosen": -218.34811401367188, "logps/rejected": -432.6155700683594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.2333984375, "rewards/margins": 21.055788040161133, "rewards/rejected": -23.2891845703125, "step": 4796 }, { "epoch": 1.64, "learning_rate": 1.676869110578234e-07, "logits/chosen": 0.051224205642938614, "logits/rejected": 0.0976581946015358, "logps/chosen": -259.9014892578125, "logps/rejected": -318.84271240234375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.7266174554824829, "rewards/margins": 11.720609664916992, "rewards/rejected": -12.447227478027344, "step": 4797 }, { "epoch": 1.64, "learning_rate": 1.6738066891515602e-07, "logits/chosen": -0.007466027978807688, "logits/rejected": 0.025773603469133377, "logps/chosen": -191.90692138671875, "logps/rejected": -319.8220520019531, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.5003571510314941, "rewards/margins": 16.713130950927734, "rewards/rejected": -18.21348762512207, "step": 4798 }, { "epoch": 1.64, "learning_rate": 1.6707468112620416e-07, "logits/chosen": 0.09159187972545624, "logits/rejected": 0.12012408673763275, "logps/chosen": -193.74398803710938, "logps/rejected": -350.5467224121094, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.91947865486145, "rewards/margins": 15.95796012878418, "rewards/rejected": -18.877437591552734, "step": 4799 }, { "epoch": 1.64, "learning_rate": 1.6676894778444205e-07, "logits/chosen": 0.004914640914648771, "logits/rejected": 0.028872136026620865, "logps/chosen": -191.8444061279297, "logps/rejected": -377.22247314453125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.5423345565795898, "rewards/margins": 19.028764724731445, "rewards/rejected": -20.57110023498535, "step": 4800 }, { "epoch": 1.64, "learning_rate": 1.6646346898326814e-07, "logits/chosen": 0.1222156286239624, "logits/rejected": 0.16048696637153625, "logps/chosen": -218.10678100585938, "logps/rejected": -308.82330322265625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6632258296012878, "rewards/margins": 15.247339248657227, "rewards/rejected": -15.910564422607422, "step": 4801 }, { "epoch": 1.64, "learning_rate": 1.6615824481600127e-07, "logits/chosen": 0.10522414743900299, "logits/rejected": 0.11040400713682175, "logps/chosen": -159.01287841796875, "logps/rejected": -315.41796875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.345848560333252, "rewards/margins": 15.30963134765625, "rewards/rejected": -17.655479431152344, "step": 4802 }, { "epoch": 1.64, "learning_rate": 1.658532753758829e-07, "logits/chosen": 0.06175782158970833, "logits/rejected": 0.06369665265083313, "logps/chosen": -175.8445281982422, "logps/rejected": -388.2572326660156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.860242486000061, "rewards/margins": 16.815385818481445, "rewards/rejected": -18.675626754760742, "step": 4803 }, { "epoch": 1.64, "learning_rate": 1.6554856075607793e-07, "logits/chosen": 0.018208475783467293, "logits/rejected": 0.054124634712934494, "logps/chosen": -218.30892944335938, "logps/rejected": -362.9693908691406, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.26065126061439514, "rewards/margins": 20.04778289794922, "rewards/rejected": -20.30843734741211, "step": 4804 }, { "epoch": 1.64, "learning_rate": 1.6524410104967201e-07, "logits/chosen": -0.10328976809978485, "logits/rejected": -0.08118336647748947, "logps/chosen": -230.4549560546875, "logps/rejected": -412.7197265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8776729702949524, "rewards/margins": 19.121408462524414, "rewards/rejected": -19.999082565307617, "step": 4805 }, { "epoch": 1.64, "learning_rate": 1.6493989634967276e-07, "logits/chosen": -0.13684645295143127, "logits/rejected": -0.0977267324924469, "logps/chosen": -228.5740509033203, "logps/rejected": -353.2940979003906, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -2.3042540550231934, "rewards/margins": 14.591804504394531, "rewards/rejected": -16.896060943603516, "step": 4806 }, { "epoch": 1.64, "learning_rate": 1.646359467490117e-07, "logits/chosen": 0.11242136359214783, "logits/rejected": 0.11988380551338196, "logps/chosen": -136.87014770507812, "logps/rejected": -345.11956787109375, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.1615488529205322, "rewards/margins": 19.062543869018555, "rewards/rejected": -20.224092483520508, "step": 4807 }, { "epoch": 1.64, "learning_rate": 1.6433225234054026e-07, "logits/chosen": 0.013643961399793625, "logits/rejected": 0.043803781270980835, "logps/chosen": -180.50833129882812, "logps/rejected": -364.4598388671875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.799930214881897, "rewards/margins": 18.484468460083008, "rewards/rejected": -19.28439712524414, "step": 4808 }, { "epoch": 1.64, "learning_rate": 1.6402881321703332e-07, "logits/chosen": -0.09912512451410294, "logits/rejected": -0.04950860142707825, "logps/chosen": -185.79910278320312, "logps/rejected": -286.9912109375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.0987603664398193, "rewards/margins": 14.833706855773926, "rewards/rejected": -15.932466506958008, "step": 4809 }, { "epoch": 1.64, "learning_rate": 1.6372562947118762e-07, "logits/chosen": 0.015492471866309643, "logits/rejected": 0.08540892601013184, "logps/chosen": -203.91470336914062, "logps/rejected": -351.6017761230469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.937252998352051, "rewards/margins": 16.57997703552246, "rewards/rejected": -19.517230987548828, "step": 4810 }, { "epoch": 1.64, "learning_rate": 1.6342270119562096e-07, "logits/chosen": 0.0012875847751274705, "logits/rejected": 0.007603779435157776, "logps/chosen": -211.95565795898438, "logps/rejected": -380.9834289550781, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.14386209845542908, "rewards/margins": 16.597490310668945, "rewards/rejected": -16.741352081298828, "step": 4811 }, { "epoch": 1.64, "learning_rate": 1.63120028482874e-07, "logits/chosen": 0.11035824567079544, "logits/rejected": 0.11981363594532013, "logps/chosen": -118.95915222167969, "logps/rejected": -264.7208251953125, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -1.6606807708740234, "rewards/margins": 12.872255325317383, "rewards/rejected": -14.532937049865723, "step": 4812 }, { "epoch": 1.64, "learning_rate": 1.6281761142540918e-07, "logits/chosen": 0.04428894817829132, "logits/rejected": 0.0662410631775856, "logps/chosen": -203.88345336914062, "logps/rejected": -304.49005126953125, "loss": 0.0419, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5738377571105957, "rewards/margins": 10.372756958007812, "rewards/rejected": -12.946595191955566, "step": 4813 }, { "epoch": 1.64, "learning_rate": 1.6251545011561096e-07, "logits/chosen": -0.0017025367124006152, "logits/rejected": 0.027951538562774658, "logps/chosen": -217.2815704345703, "logps/rejected": -449.49017333984375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.5279428362846375, "rewards/margins": 21.605775833129883, "rewards/rejected": -22.13372039794922, "step": 4814 }, { "epoch": 1.64, "learning_rate": 1.6221354464578497e-07, "logits/chosen": 0.02152341604232788, "logits/rejected": 0.029446344822645187, "logps/chosen": -210.64208984375, "logps/rejected": -409.6688537597656, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.7697334289550781, "rewards/margins": 19.0472412109375, "rewards/rejected": -20.816974639892578, "step": 4815 }, { "epoch": 1.64, "learning_rate": 1.619118951081594e-07, "logits/chosen": 0.04085082933306694, "logits/rejected": 0.07726821303367615, "logps/chosen": -227.021484375, "logps/rejected": -293.67620849609375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.8650325536727905, "rewards/margins": 12.35395622253418, "rewards/rejected": -14.218987464904785, "step": 4816 }, { "epoch": 1.64, "learning_rate": 1.616105015948842e-07, "logits/chosen": 0.057888731360435486, "logits/rejected": 0.09276709705591202, "logps/chosen": -229.21507263183594, "logps/rejected": -353.8423156738281, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -0.1887359470129013, "rewards/margins": 15.480767250061035, "rewards/rejected": -15.669503211975098, "step": 4817 }, { "epoch": 1.64, "learning_rate": 1.6130936419803022e-07, "logits/chosen": -0.09166020154953003, "logits/rejected": -0.07116515189409256, "logps/chosen": -214.2916259765625, "logps/rejected": -311.9954833984375, "loss": 0.0265, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4764022827148438, "rewards/margins": 13.118868827819824, "rewards/rejected": -16.59526824951172, "step": 4818 }, { "epoch": 1.64, "learning_rate": 1.610084830095918e-07, "logits/chosen": 0.07878755033016205, "logits/rejected": 0.1154850497841835, "logps/chosen": -214.88864135742188, "logps/rejected": -389.8491516113281, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1034518480300903, "rewards/margins": 19.582212448120117, "rewards/rejected": -20.6856632232666, "step": 4819 }, { "epoch": 1.65, "learning_rate": 1.6070785812148358e-07, "logits/chosen": 0.0079933637753129, "logits/rejected": 0.029691249132156372, "logps/chosen": -238.37014770507812, "logps/rejected": -435.2682800292969, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.427919864654541, "rewards/margins": 18.155731201171875, "rewards/rejected": -19.583648681640625, "step": 4820 }, { "epoch": 1.65, "learning_rate": 1.6040748962554163e-07, "logits/chosen": -0.0373198539018631, "logits/rejected": 0.006061182357370853, "logps/chosen": -228.230712890625, "logps/rejected": -378.4682312011719, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.503413438796997, "rewards/margins": 18.29431915283203, "rewards/rejected": -20.797733306884766, "step": 4821 }, { "epoch": 1.65, "learning_rate": 1.6010737761352543e-07, "logits/chosen": -0.000492460501845926, "logits/rejected": 0.044018980115652084, "logps/chosen": -258.987548828125, "logps/rejected": -348.5914611816406, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8114178776741028, "rewards/margins": 13.827582359313965, "rewards/rejected": -14.638999938964844, "step": 4822 }, { "epoch": 1.65, "learning_rate": 1.5980752217711447e-07, "logits/chosen": 0.11194851249456406, "logits/rejected": 0.12123998999595642, "logps/chosen": -190.50975036621094, "logps/rejected": -343.24884033203125, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -2.8727285861968994, "rewards/margins": 15.798848152160645, "rewards/rejected": -18.67157745361328, "step": 4823 }, { "epoch": 1.65, "learning_rate": 1.595079234079104e-07, "logits/chosen": 0.0579957440495491, "logits/rejected": 0.07157473266124725, "logps/chosen": -223.78482055664062, "logps/rejected": -406.092041015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.2145056426525116, "rewards/margins": 19.722015380859375, "rewards/rejected": -19.936525344848633, "step": 4824 }, { "epoch": 1.65, "learning_rate": 1.592085813974371e-07, "logits/chosen": 0.045706313103437424, "logits/rejected": 0.10424260050058365, "logps/chosen": -243.33819580078125, "logps/rejected": -345.9134216308594, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.6814332008361816, "rewards/margins": 16.611787796020508, "rewards/rejected": -17.29322052001953, "step": 4825 }, { "epoch": 1.65, "learning_rate": 1.5890949623713866e-07, "logits/chosen": -0.086673803627491, "logits/rejected": -0.09706497937440872, "logps/chosen": -217.13951110839844, "logps/rejected": -427.39337158203125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8471286296844482, "rewards/margins": 16.368892669677734, "rewards/rejected": -18.216022491455078, "step": 4826 }, { "epoch": 1.65, "learning_rate": 1.5861066801838197e-07, "logits/chosen": -0.012025312520563602, "logits/rejected": 0.007668890058994293, "logps/chosen": -220.82162475585938, "logps/rejected": -393.8420715332031, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.7348583936691284, "rewards/margins": 17.712814331054688, "rewards/rejected": -18.44767189025879, "step": 4827 }, { "epoch": 1.65, "learning_rate": 1.583120968324546e-07, "logits/chosen": -0.013317590579390526, "logits/rejected": -0.006773855071514845, "logps/chosen": -175.13433837890625, "logps/rejected": -337.19256591796875, "loss": 0.0746, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7380563616752625, "rewards/margins": 16.020679473876953, "rewards/rejected": -16.75873374938965, "step": 4828 }, { "epoch": 1.65, "learning_rate": 1.5801378277056642e-07, "logits/chosen": 0.004938545171171427, "logits/rejected": 0.0397527776658535, "logps/chosen": -258.45697021484375, "logps/rejected": -379.80181884765625, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -1.8005194664001465, "rewards/margins": 17.107526779174805, "rewards/rejected": -18.90804672241211, "step": 4829 }, { "epoch": 1.65, "learning_rate": 1.5771572592384763e-07, "logits/chosen": -0.1545426845550537, "logits/rejected": -0.09002687782049179, "logps/chosen": -270.0484924316406, "logps/rejected": -409.2637023925781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.40605470538139343, "rewards/margins": 19.766571044921875, "rewards/rejected": -20.172624588012695, "step": 4830 }, { "epoch": 1.65, "learning_rate": 1.5741792638335095e-07, "logits/chosen": 0.07532574981451035, "logits/rejected": 0.10408298671245575, "logps/chosen": -168.95913696289062, "logps/rejected": -368.91986083984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.24082374572753906, "rewards/margins": 20.06890106201172, "rewards/rejected": -20.309724807739258, "step": 4831 }, { "epoch": 1.65, "learning_rate": 1.5712038424004991e-07, "logits/chosen": 0.06096848472952843, "logits/rejected": 0.09176656603813171, "logps/chosen": -207.95423889160156, "logps/rejected": -318.0065002441406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.34397727251052856, "rewards/margins": 15.592111587524414, "rewards/rejected": -15.936088562011719, "step": 4832 }, { "epoch": 1.65, "learning_rate": 1.5682309958483918e-07, "logits/chosen": 0.08932198584079742, "logits/rejected": 0.13361839950084686, "logps/chosen": -153.54013061523438, "logps/rejected": -264.0976257324219, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.6197872161865234, "rewards/margins": 13.157608985900879, "rewards/rejected": -14.777397155761719, "step": 4833 }, { "epoch": 1.65, "learning_rate": 1.5652607250853577e-07, "logits/chosen": 0.09095309674739838, "logits/rejected": 0.1271079182624817, "logps/chosen": -169.39822387695312, "logps/rejected": -323.062744140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5046780109405518, "rewards/margins": 15.696625709533691, "rewards/rejected": -17.20130157470703, "step": 4834 }, { "epoch": 1.65, "learning_rate": 1.562293031018771e-07, "logits/chosen": 0.10641493648290634, "logits/rejected": 0.1323673129081726, "logps/chosen": -194.7384796142578, "logps/rejected": -273.659423828125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421438694000244, "rewards/margins": 12.855496406555176, "rewards/rejected": -14.097640037536621, "step": 4835 }, { "epoch": 1.65, "learning_rate": 1.5593279145552162e-07, "logits/chosen": 0.04356260597705841, "logits/rejected": 0.08694383502006531, "logps/chosen": -188.4132080078125, "logps/rejected": -314.79803466796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1945370435714722, "rewards/margins": 15.077937126159668, "rewards/rejected": -16.272472381591797, "step": 4836 }, { "epoch": 1.65, "learning_rate": 1.5563653766005037e-07, "logits/chosen": -0.012312267906963825, "logits/rejected": 0.016177063807845116, "logps/chosen": -194.05722045898438, "logps/rejected": -351.669677734375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.625896453857422, "rewards/margins": 14.953139305114746, "rewards/rejected": -17.57903480529785, "step": 4837 }, { "epoch": 1.65, "learning_rate": 1.5534054180596412e-07, "logits/chosen": 0.04972393438220024, "logits/rejected": 0.06731083989143372, "logps/chosen": -213.14630126953125, "logps/rejected": -392.508544921875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150308132171631, "rewards/margins": 17.210895538330078, "rewards/rejected": -16.895864486694336, "step": 4838 }, { "epoch": 1.65, "learning_rate": 1.550448039836858e-07, "logits/chosen": -0.016572782769799232, "logits/rejected": 0.05239709094166756, "logps/chosen": -239.9278564453125, "logps/rejected": -383.8324279785156, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8551186323165894, "rewards/margins": 18.961820602416992, "rewards/rejected": -20.816940307617188, "step": 4839 }, { "epoch": 1.65, "learning_rate": 1.5474932428355957e-07, "logits/chosen": 0.00233522173948586, "logits/rejected": 0.04241400957107544, "logps/chosen": -213.67239379882812, "logps/rejected": -381.14874267578125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7175525426864624, "rewards/margins": 18.465911865234375, "rewards/rejected": -19.1834659576416, "step": 4840 }, { "epoch": 1.65, "learning_rate": 1.544541027958497e-07, "logits/chosen": -0.061655089259147644, "logits/rejected": -0.03526420518755913, "logps/chosen": -211.80767822265625, "logps/rejected": -344.9834289550781, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -2.6782736778259277, "rewards/margins": 12.652263641357422, "rewards/rejected": -15.330536842346191, "step": 4841 }, { "epoch": 1.65, "learning_rate": 1.5415913961074268e-07, "logits/chosen": -0.053681548684835434, "logits/rejected": -0.005353632383048534, "logps/chosen": -250.60009765625, "logps/rejected": -380.296875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.3166227340698242, "rewards/margins": 15.50113296508789, "rewards/rejected": -16.81775665283203, "step": 4842 }, { "epoch": 1.65, "learning_rate": 1.5386443481834587e-07, "logits/chosen": -0.06375465542078018, "logits/rejected": -0.04934951290488243, "logps/chosen": -168.52545166015625, "logps/rejected": -360.861083984375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.2264068126678467, "rewards/margins": 17.059738159179688, "rewards/rejected": -18.286144256591797, "step": 4843 }, { "epoch": 1.65, "learning_rate": 1.535699885086872e-07, "logits/chosen": 0.039251770824193954, "logits/rejected": 0.07412464171648026, "logps/chosen": -181.59474182128906, "logps/rejected": -291.3274841308594, "loss": 0.0609, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1033217906951904, "rewards/margins": 10.723945617675781, "rewards/rejected": -12.82726764678955, "step": 4844 }, { "epoch": 1.65, "learning_rate": 1.5327580077171588e-07, "logits/chosen": -0.05328645557165146, "logits/rejected": -0.030987413600087166, "logps/chosen": -254.78707885742188, "logps/rejected": -436.9042053222656, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.038907527923584, "rewards/margins": 20.23400115966797, "rewards/rejected": -22.27290916442871, "step": 4845 }, { "epoch": 1.65, "learning_rate": 1.5298187169730248e-07, "logits/chosen": 0.0816529393196106, "logits/rejected": 0.10948524624109268, "logps/chosen": -209.5892791748047, "logps/rejected": -357.03411865234375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.3362503051757812, "rewards/margins": 16.066688537597656, "rewards/rejected": -18.40294075012207, "step": 4846 }, { "epoch": 1.65, "learning_rate": 1.526882013752383e-07, "logits/chosen": 0.033244237303733826, "logits/rejected": 0.06299590319395065, "logps/chosen": -160.691650390625, "logps/rejected": -289.9624328613281, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.08824028074741364, "rewards/margins": 15.965112686157227, "rewards/rejected": -16.053354263305664, "step": 4847 }, { "epoch": 1.65, "learning_rate": 1.5239478989523525e-07, "logits/chosen": -0.0003647748671937734, "logits/rejected": 0.022670848295092583, "logps/chosen": -221.54574584960938, "logps/rejected": -482.43402099609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.2248988151550293, "rewards/margins": 23.312597274780273, "rewards/rejected": -25.537498474121094, "step": 4848 }, { "epoch": 1.65, "learning_rate": 1.5210163734692672e-07, "logits/chosen": -0.068159319460392, "logits/rejected": -0.03285614028573036, "logps/chosen": -248.7535858154297, "logps/rejected": -360.6458435058594, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.495809406042099, "rewards/margins": 14.813667297363281, "rewards/rejected": -15.309475898742676, "step": 4849 }, { "epoch": 1.66, "learning_rate": 1.5180874381986696e-07, "logits/chosen": 0.011280998587608337, "logits/rejected": 0.02840283513069153, "logps/chosen": -235.94549560546875, "logps/rejected": -390.5926208496094, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.247846245765686, "rewards/margins": 15.715340614318848, "rewards/rejected": -16.963186264038086, "step": 4850 }, { "epoch": 1.66, "learning_rate": 1.515161094035301e-07, "logits/chosen": 0.05186636000871658, "logits/rejected": 0.0652969628572464, "logps/chosen": -167.2625274658203, "logps/rejected": -333.967041015625, "loss": 0.0279, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0316412448883057, "rewards/margins": 14.091496467590332, "rewards/rejected": -15.123138427734375, "step": 4851 }, { "epoch": 1.66, "learning_rate": 1.5122373418731304e-07, "logits/chosen": 0.11379779875278473, "logits/rejected": 0.12991370260715485, "logps/chosen": -172.38262939453125, "logps/rejected": -307.2477111816406, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.779175043106079, "rewards/margins": 13.75685977935791, "rewards/rejected": -15.53603458404541, "step": 4852 }, { "epoch": 1.66, "learning_rate": 1.509316182605318e-07, "logits/chosen": -0.030698217451572418, "logits/rejected": -0.011378110386431217, "logps/chosen": -210.2983856201172, "logps/rejected": -420.9255676269531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.804690957069397, "rewards/margins": 20.65726089477539, "rewards/rejected": -21.461950302124023, "step": 4853 }, { "epoch": 1.66, "learning_rate": 1.5063976171242333e-07, "logits/chosen": 0.007712061982601881, "logits/rejected": 0.025162916630506516, "logps/chosen": -173.84378051757812, "logps/rejected": -322.17657470703125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.1425960063934326, "rewards/margins": 14.25247573852539, "rewards/rejected": -15.395071029663086, "step": 4854 }, { "epoch": 1.66, "learning_rate": 1.5034816463214673e-07, "logits/chosen": 0.1852680742740631, "logits/rejected": 0.1996462196111679, "logps/chosen": -146.85345458984375, "logps/rejected": -279.78692626953125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.610908269882202, "rewards/margins": 13.788008689880371, "rewards/rejected": -16.39891815185547, "step": 4855 }, { "epoch": 1.66, "learning_rate": 1.5005682710878009e-07, "logits/chosen": 0.008478938601911068, "logits/rejected": 0.025358565151691437, "logps/chosen": -231.06407165527344, "logps/rejected": -406.7174377441406, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.0391368865966797, "rewards/margins": 18.434301376342773, "rewards/rejected": -19.473438262939453, "step": 4856 }, { "epoch": 1.66, "learning_rate": 1.4976574923132322e-07, "logits/chosen": 0.02578956075012684, "logits/rejected": 0.054812099784612656, "logps/chosen": -224.3797607421875, "logps/rejected": -405.7980041503906, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.32261210680007935, "rewards/margins": 21.346433639526367, "rewards/rejected": -21.66904640197754, "step": 4857 }, { "epoch": 1.66, "learning_rate": 1.494749310886968e-07, "logits/chosen": 0.10013610124588013, "logits/rejected": 0.15149614214897156, "logps/chosen": -226.59580993652344, "logps/rejected": -354.19647216796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4633582532405853, "rewards/margins": 18.038007736206055, "rewards/rejected": -18.501367568969727, "step": 4858 }, { "epoch": 1.66, "learning_rate": 1.4918437276974126e-07, "logits/chosen": 0.06330537050962448, "logits/rejected": 0.07418952137231827, "logps/chosen": -132.7184600830078, "logps/rejected": -283.13067626953125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.0389158725738525, "rewards/margins": 14.071258544921875, "rewards/rejected": -17.11017608642578, "step": 4859 }, { "epoch": 1.66, "learning_rate": 1.4889407436321822e-07, "logits/chosen": 0.024053391069173813, "logits/rejected": 0.035533297806978226, "logps/chosen": -232.41744995117188, "logps/rejected": -410.95245361328125, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -2.498420476913452, "rewards/margins": 15.442651748657227, "rewards/rejected": -17.941072463989258, "step": 4860 }, { "epoch": 1.66, "learning_rate": 1.4860403595781003e-07, "logits/chosen": -0.06368456780910492, "logits/rejected": -0.04625090956687927, "logps/chosen": -184.3076629638672, "logps/rejected": -358.185791015625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.310848355293274, "rewards/margins": 17.159086227416992, "rewards/rejected": -18.46993637084961, "step": 4861 }, { "epoch": 1.66, "learning_rate": 1.4831425764211947e-07, "logits/chosen": -0.03076552227139473, "logits/rejected": 0.012214500457048416, "logps/chosen": -176.40081787109375, "logps/rejected": -281.424560546875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.7555038928985596, "rewards/margins": 13.169668197631836, "rewards/rejected": -13.925172805786133, "step": 4862 }, { "epoch": 1.66, "learning_rate": 1.4802473950466944e-07, "logits/chosen": -0.043141257017850876, "logits/rejected": -0.008080846630036831, "logps/chosen": -239.9901123046875, "logps/rejected": -425.11614990234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.3831045627593994, "rewards/margins": 19.9761905670166, "rewards/rejected": -21.359294891357422, "step": 4863 }, { "epoch": 1.66, "learning_rate": 1.4773548163390404e-07, "logits/chosen": -0.06823445856571198, "logits/rejected": -0.03865879029035568, "logps/chosen": -256.44012451171875, "logps/rejected": -442.42523193359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.442626476287842, "rewards/margins": 18.841890335083008, "rewards/rejected": -21.284515380859375, "step": 4864 }, { "epoch": 1.66, "learning_rate": 1.4744648411818772e-07, "logits/chosen": 0.16338036954402924, "logits/rejected": 0.17319872975349426, "logps/chosen": -213.29672241210938, "logps/rejected": -387.3903503417969, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0741822719573975, "rewards/margins": 17.292570114135742, "rewards/rejected": -18.36675453186035, "step": 4865 }, { "epoch": 1.66, "learning_rate": 1.4715774704580453e-07, "logits/chosen": 0.07751496881246567, "logits/rejected": 0.11097940057516098, "logps/chosen": -214.08642578125, "logps/rejected": -368.6437072753906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.9004125595092773, "rewards/margins": 16.503023147583008, "rewards/rejected": -18.40343475341797, "step": 4866 }, { "epoch": 1.66, "learning_rate": 1.4686927050496067e-07, "logits/chosen": 0.051000937819480896, "logits/rejected": 0.0828695222735405, "logps/chosen": -235.9749755859375, "logps/rejected": -467.7510681152344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2699053883552551, "rewards/margins": 21.67233657836914, "rewards/rejected": -21.942241668701172, "step": 4867 }, { "epoch": 1.66, "learning_rate": 1.465810545837811e-07, "logits/chosen": -0.02038988657295704, "logits/rejected": 0.012334566563367844, "logps/chosen": -196.64462280273438, "logps/rejected": -365.65582275390625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7818254232406616, "rewards/margins": 18.6408748626709, "rewards/rejected": -19.422700881958008, "step": 4868 }, { "epoch": 1.66, "learning_rate": 1.4629309937031154e-07, "logits/chosen": 0.13708560168743134, "logits/rejected": 0.1473361849784851, "logps/chosen": -100.38853454589844, "logps/rejected": -263.1715087890625, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -1.7401492595672607, "rewards/margins": 14.66356086730957, "rewards/rejected": -16.403709411621094, "step": 4869 }, { "epoch": 1.66, "learning_rate": 1.4600540495251912e-07, "logits/chosen": -0.06609781086444855, "logits/rejected": -0.025545038282871246, "logps/chosen": -276.90899658203125, "logps/rejected": -462.3263244628906, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.1020430326461792, "rewards/margins": 21.535715103149414, "rewards/rejected": -22.637760162353516, "step": 4870 }, { "epoch": 1.66, "learning_rate": 1.4571797141828979e-07, "logits/chosen": 0.1140214204788208, "logits/rejected": 0.14017453789710999, "logps/chosen": -224.56002807617188, "logps/rejected": -361.9488525390625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.198439598083496, "rewards/margins": 14.922708511352539, "rewards/rejected": -16.12114715576172, "step": 4871 }, { "epoch": 1.66, "learning_rate": 1.4543079885543097e-07, "logits/chosen": 0.09660984575748444, "logits/rejected": 0.14660438895225525, "logps/chosen": -211.82223510742188, "logps/rejected": -321.1305847167969, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.232520580291748, "rewards/margins": 13.087817192077637, "rewards/rejected": -15.32033920288086, "step": 4872 }, { "epoch": 1.66, "learning_rate": 1.4514388735167006e-07, "logits/chosen": 0.03563258424401283, "logits/rejected": 0.06733637303113937, "logps/chosen": -216.60321044921875, "logps/rejected": -351.0926208496094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.7735044956207275, "rewards/margins": 15.381318092346191, "rewards/rejected": -18.154823303222656, "step": 4873 }, { "epoch": 1.66, "learning_rate": 1.448572369946539e-07, "logits/chosen": -0.007357233203947544, "logits/rejected": 0.04255016893148422, "logps/chosen": -254.54698181152344, "logps/rejected": -416.122802734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.43011736869812, "rewards/margins": 17.604408264160156, "rewards/rejected": -20.034526824951172, "step": 4874 }, { "epoch": 1.66, "learning_rate": 1.4457084787195073e-07, "logits/chosen": 0.052417952567338943, "logits/rejected": 0.06489697098731995, "logps/chosen": -164.15415954589844, "logps/rejected": -333.21185302734375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.844285249710083, "rewards/margins": 15.539105415344238, "rewards/rejected": -18.38338851928711, "step": 4875 }, { "epoch": 1.66, "learning_rate": 1.4428472007104832e-07, "logits/chosen": 0.04781166836619377, "logits/rejected": 0.08084914833307266, "logps/chosen": -259.60247802734375, "logps/rejected": -371.8026123046875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.1035611629486084, "rewards/margins": 14.749957084655762, "rewards/rejected": -16.853519439697266, "step": 4876 }, { "epoch": 1.66, "learning_rate": 1.4399885367935506e-07, "logits/chosen": 0.05179944634437561, "logits/rejected": 0.07345840334892273, "logps/chosen": -193.93016052246094, "logps/rejected": -358.6689453125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.0064759254455566, "rewards/margins": 15.764429092407227, "rewards/rejected": -16.770906448364258, "step": 4877 }, { "epoch": 1.66, "learning_rate": 1.4371324878419855e-07, "logits/chosen": -0.0088008688762784, "logits/rejected": 0.035904839634895325, "logps/chosen": -157.79466247558594, "logps/rejected": -437.50469970703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.469557523727417, "rewards/margins": 23.73353385925293, "rewards/rejected": -26.203092575073242, "step": 4878 }, { "epoch": 1.67, "learning_rate": 1.434279054728277e-07, "logits/chosen": 0.026200417429208755, "logits/rejected": 0.043293893337249756, "logps/chosen": -181.80252075195312, "logps/rejected": -306.2244873046875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.8286329507827759, "rewards/margins": 13.042181968688965, "rewards/rejected": -14.87081527709961, "step": 4879 }, { "epoch": 1.67, "learning_rate": 1.4314282383241095e-07, "logits/chosen": 0.117487333714962, "logits/rejected": 0.15531013906002045, "logps/chosen": -188.3287353515625, "logps/rejected": -366.6141052246094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.0753164291381836, "rewards/margins": 19.5375919342041, "rewards/rejected": -21.612911224365234, "step": 4880 }, { "epoch": 1.67, "learning_rate": 1.4285800395003634e-07, "logits/chosen": -0.023337751626968384, "logits/rejected": -0.01406277809292078, "logps/chosen": -161.5330352783203, "logps/rejected": -383.5641784667969, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -2.194688081741333, "rewards/margins": 18.690277099609375, "rewards/rejected": -20.884963989257812, "step": 4881 }, { "epoch": 1.67, "learning_rate": 1.425734459127128e-07, "logits/chosen": -0.08793173730373383, "logits/rejected": -0.040541134774684906, "logps/chosen": -241.94253540039062, "logps/rejected": -327.7285461425781, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5384715795516968, "rewards/margins": 14.449286460876465, "rewards/rejected": -15.98775863647461, "step": 4882 }, { "epoch": 1.67, "learning_rate": 1.4228914980736896e-07, "logits/chosen": -0.027739115059375763, "logits/rejected": 0.01761730946600437, "logps/chosen": -227.30133056640625, "logps/rejected": -369.9300537109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5629246234893799, "rewards/margins": 17.21089744567871, "rewards/rejected": -17.773822784423828, "step": 4883 }, { "epoch": 1.67, "learning_rate": 1.4200511572085273e-07, "logits/chosen": 0.01211397536098957, "logits/rejected": 0.029476674273610115, "logps/chosen": -208.7637481689453, "logps/rejected": -425.74957275390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.30134865641593933, "rewards/margins": 22.068294525146484, "rewards/rejected": -22.369640350341797, "step": 4884 }, { "epoch": 1.67, "learning_rate": 1.417213437399335e-07, "logits/chosen": 0.05343683809041977, "logits/rejected": 0.09440283477306366, "logps/chosen": -210.2640838623047, "logps/rejected": -373.6365966796875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.7274761199951172, "rewards/margins": 18.10367774963379, "rewards/rejected": -19.83115577697754, "step": 4885 }, { "epoch": 1.67, "learning_rate": 1.4143783395129905e-07, "logits/chosen": -0.03252909332513809, "logits/rejected": 0.0009279122459702194, "logps/chosen": -235.67677307128906, "logps/rejected": -455.22296142578125, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.0283315181732178, "rewards/margins": 18.374221801757812, "rewards/rejected": -20.402555465698242, "step": 4886 }, { "epoch": 1.67, "learning_rate": 1.41154586441558e-07, "logits/chosen": -0.005058441776782274, "logits/rejected": 0.045431122183799744, "logps/chosen": -235.5931396484375, "logps/rejected": -395.03509521484375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.7281808853149414, "rewards/margins": 17.386857986450195, "rewards/rejected": -19.115039825439453, "step": 4887 }, { "epoch": 1.67, "learning_rate": 1.4087160129723853e-07, "logits/chosen": 0.0331757552921772, "logits/rejected": 0.04296070337295532, "logps/chosen": -189.55331420898438, "logps/rejected": -391.943603515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.971423864364624, "rewards/margins": 16.847427368164062, "rewards/rejected": -19.818851470947266, "step": 4888 }, { "epoch": 1.67, "learning_rate": 1.405888786047885e-07, "logits/chosen": 0.08046796917915344, "logits/rejected": 0.10460818558931351, "logps/chosen": -239.82484436035156, "logps/rejected": -390.58349609375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.021904706954956, "rewards/margins": 16.62546157836914, "rewards/rejected": -19.64736557006836, "step": 4889 }, { "epoch": 1.67, "learning_rate": 1.403064184505759e-07, "logits/chosen": -0.04467424005270004, "logits/rejected": -0.028176501393318176, "logps/chosen": -242.01348876953125, "logps/rejected": -376.82928466796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6856897473335266, "rewards/margins": 16.90216064453125, "rewards/rejected": -17.587848663330078, "step": 4890 }, { "epoch": 1.67, "learning_rate": 1.4002422092088862e-07, "logits/chosen": 0.11074143648147583, "logits/rejected": 0.11839722841978073, "logps/chosen": -149.00082397460938, "logps/rejected": -252.02110290527344, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.8225717544555664, "rewards/margins": 11.4226655960083, "rewards/rejected": -13.24523639678955, "step": 4891 }, { "epoch": 1.67, "learning_rate": 1.3974228610193373e-07, "logits/chosen": -0.062283892184495926, "logits/rejected": -0.055896274745464325, "logps/chosen": -216.3626708984375, "logps/rejected": -405.31573486328125, "loss": 0.0463, "rewards/accuracies": 0.9375, "rewards/chosen": -3.253791332244873, "rewards/margins": 16.163501739501953, "rewards/rejected": -19.417293548583984, "step": 4892 }, { "epoch": 1.67, "learning_rate": 1.3946061407983866e-07, "logits/chosen": -0.04148253798484802, "logits/rejected": -0.010230782441794872, "logps/chosen": -257.7579345703125, "logps/rejected": -439.90692138671875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.9287140369415283, "rewards/margins": 19.367830276489258, "rewards/rejected": -21.296545028686523, "step": 4893 }, { "epoch": 1.67, "learning_rate": 1.3917920494065026e-07, "logits/chosen": 0.10585232824087143, "logits/rejected": 0.13526375591754913, "logps/chosen": -195.85018920898438, "logps/rejected": -368.3408508300781, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5850119590759277, "rewards/margins": 16.294153213500977, "rewards/rejected": -17.87916374206543, "step": 4894 }, { "epoch": 1.67, "learning_rate": 1.3889805877033544e-07, "logits/chosen": -0.06737624108791351, "logits/rejected": -0.01839720457792282, "logps/chosen": -250.17820739746094, "logps/rejected": -479.23956298828125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -2.281003475189209, "rewards/margins": 23.171140670776367, "rewards/rejected": -25.4521484375, "step": 4895 }, { "epoch": 1.67, "learning_rate": 1.3861717565477992e-07, "logits/chosen": -0.007613467518240213, "logits/rejected": 0.02001655288040638, "logps/chosen": -199.51638793945312, "logps/rejected": -340.56390380859375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -1.0337742567062378, "rewards/margins": 15.04117488861084, "rewards/rejected": -16.074949264526367, "step": 4896 }, { "epoch": 1.67, "learning_rate": 1.3833655567978987e-07, "logits/chosen": -0.03422674164175987, "logits/rejected": 0.002693495247513056, "logps/chosen": -273.98822021484375, "logps/rejected": -419.40106201171875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.5953576564788818, "rewards/margins": 17.644933700561523, "rewards/rejected": -19.240291595458984, "step": 4897 }, { "epoch": 1.67, "learning_rate": 1.380561989310911e-07, "logits/chosen": -0.06147059053182602, "logits/rejected": -0.01861109398305416, "logps/chosen": -265.5656433105469, "logps/rejected": -407.2082214355469, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.5145822763442993, "rewards/margins": 18.938549041748047, "rewards/rejected": -20.453128814697266, "step": 4898 }, { "epoch": 1.67, "learning_rate": 1.3777610549432794e-07, "logits/chosen": 0.09389352798461914, "logits/rejected": 0.11343218386173248, "logps/chosen": -141.97317504882812, "logps/rejected": -280.1515808105469, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.2338429689407349, "rewards/margins": 13.196878433227539, "rewards/rejected": -14.430721282958984, "step": 4899 }, { "epoch": 1.67, "learning_rate": 1.3749627545506614e-07, "logits/chosen": 0.007192033808678389, "logits/rejected": 0.06693103909492493, "logps/chosen": -250.91827392578125, "logps/rejected": -360.6895446777344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.1425693035125732, "rewards/margins": 16.930564880371094, "rewards/rejected": -19.07313346862793, "step": 4900 }, { "epoch": 1.67, "learning_rate": 1.3721670889878913e-07, "logits/chosen": 0.05676383897662163, "logits/rejected": 0.0755702406167984, "logps/chosen": -178.9878692626953, "logps/rejected": -330.2199401855469, "loss": 0.0523, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2440695762634277, "rewards/margins": 14.249258995056152, "rewards/rejected": -16.493328094482422, "step": 4901 }, { "epoch": 1.67, "learning_rate": 1.3693740591090097e-07, "logits/chosen": 0.04515812173485756, "logits/rejected": 0.06954003870487213, "logps/chosen": -222.96209716796875, "logps/rejected": -407.26263427734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.052856683731079, "rewards/margins": 17.984922409057617, "rewards/rejected": -20.03778076171875, "step": 4902 }, { "epoch": 1.67, "learning_rate": 1.3665836657672492e-07, "logits/chosen": 0.07811512798070908, "logits/rejected": 0.08301788568496704, "logps/chosen": -170.46578979492188, "logps/rejected": -330.7551574707031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4391216039657593, "rewards/margins": 14.168627738952637, "rewards/rejected": -15.60775089263916, "step": 4903 }, { "epoch": 1.67, "learning_rate": 1.363795909815032e-07, "logits/chosen": -0.08980212360620499, "logits/rejected": -0.033518847078084946, "logps/chosen": -249.865478515625, "logps/rejected": -388.53228759765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.1343963146209717, "rewards/margins": 18.43335723876953, "rewards/rejected": -20.567752838134766, "step": 4904 }, { "epoch": 1.67, "learning_rate": 1.3610107921039838e-07, "logits/chosen": 0.09325123578310013, "logits/rejected": 0.11534631997346878, "logps/chosen": -207.6671600341797, "logps/rejected": -293.1350402832031, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.8917031288146973, "rewards/margins": 12.105071067810059, "rewards/rejected": -13.996774673461914, "step": 4905 }, { "epoch": 1.67, "learning_rate": 1.3582283134849194e-07, "logits/chosen": 0.0628698468208313, "logits/rejected": 0.04100242629647255, "logps/chosen": -186.3186798095703, "logps/rejected": -387.2511291503906, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.5682616233825684, "rewards/margins": 14.282347679138184, "rewards/rejected": -16.850608825683594, "step": 4906 }, { "epoch": 1.67, "learning_rate": 1.355448474807843e-07, "logits/chosen": 0.012435232289135456, "logits/rejected": 0.062189336866140366, "logps/chosen": -211.22232055664062, "logps/rejected": -357.6808776855469, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.5289974212646484, "rewards/margins": 16.494579315185547, "rewards/rejected": -19.023576736450195, "step": 4907 }, { "epoch": 1.68, "learning_rate": 1.3526712769219617e-07, "logits/chosen": 0.09774257987737656, "logits/rejected": 0.13456392288208008, "logps/chosen": -233.32192993164062, "logps/rejected": -307.52227783203125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.565435528755188, "rewards/margins": 13.741758346557617, "rewards/rejected": -15.307193756103516, "step": 4908 }, { "epoch": 1.68, "learning_rate": 1.3498967206756683e-07, "logits/chosen": 0.05268290266394615, "logits/rejected": 0.09068766236305237, "logps/chosen": -185.96620178222656, "logps/rejected": -298.53289794921875, "loss": 0.0472, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4257993698120117, "rewards/margins": 13.702543258666992, "rewards/rejected": -15.12834358215332, "step": 4909 }, { "epoch": 1.68, "learning_rate": 1.3471248069165565e-07, "logits/chosen": 0.009589537046849728, "logits/rejected": 0.010045116767287254, "logps/chosen": -191.63800048828125, "logps/rejected": -391.6286315917969, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.228745937347412, "rewards/margins": 16.36309814453125, "rewards/rejected": -18.591842651367188, "step": 4910 }, { "epoch": 1.68, "learning_rate": 1.3443555364914005e-07, "logits/chosen": -0.06875143945217133, "logits/rejected": -0.021418675780296326, "logps/chosen": -266.94671630859375, "logps/rejected": -396.80975341796875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.9134875535964966, "rewards/margins": 19.333276748657227, "rewards/rejected": -21.246763229370117, "step": 4911 }, { "epoch": 1.68, "learning_rate": 1.3415889102461775e-07, "logits/chosen": -0.05715295299887657, "logits/rejected": -0.005064732860773802, "logps/chosen": -209.0209197998047, "logps/rejected": -320.7208251953125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.60189950466156, "rewards/margins": 16.431554794311523, "rewards/rejected": -18.0334529876709, "step": 4912 }, { "epoch": 1.68, "learning_rate": 1.3388249290260566e-07, "logits/chosen": 0.055567387491464615, "logits/rejected": 0.06356681883335114, "logps/chosen": -230.6560516357422, "logps/rejected": -399.75347900390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.3852977752685547, "rewards/margins": 16.922136306762695, "rewards/rejected": -19.30743408203125, "step": 4913 }, { "epoch": 1.68, "learning_rate": 1.3360635936753884e-07, "logits/chosen": -0.06584326922893524, "logits/rejected": -0.03494084253907204, "logps/chosen": -205.0557098388672, "logps/rejected": -372.2132873535156, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -2.051335096359253, "rewards/margins": 17.425878524780273, "rewards/rejected": -19.47721290588379, "step": 4914 }, { "epoch": 1.68, "learning_rate": 1.333304905037731e-07, "logits/chosen": 0.005845942068845034, "logits/rejected": 0.021709011867642403, "logps/chosen": -234.43414306640625, "logps/rejected": -383.86572265625, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": -0.690987229347229, "rewards/margins": 16.785234451293945, "rewards/rejected": -17.476221084594727, "step": 4915 }, { "epoch": 1.68, "learning_rate": 1.3305488639558205e-07, "logits/chosen": 0.07741416990756989, "logits/rejected": 0.10188179463148117, "logps/chosen": -194.73292541503906, "logps/rejected": -317.738037109375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.2776602506637573, "rewards/margins": 14.03866958618164, "rewards/rejected": -15.316329956054688, "step": 4916 }, { "epoch": 1.68, "learning_rate": 1.327795471271591e-07, "logits/chosen": -0.016439521685242653, "logits/rejected": -0.014922496862709522, "logps/chosen": -223.96038818359375, "logps/rejected": -403.2737731933594, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.9242324233055115, "rewards/margins": 17.00718116760254, "rewards/rejected": -17.931411743164062, "step": 4917 }, { "epoch": 1.68, "learning_rate": 1.3250447278261688e-07, "logits/chosen": 0.08746340870857239, "logits/rejected": 0.10713813453912735, "logps/chosen": -157.20758056640625, "logps/rejected": -316.6732482910156, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.8528751134872437, "rewards/margins": 15.506781578063965, "rewards/rejected": -17.359657287597656, "step": 4918 }, { "epoch": 1.68, "learning_rate": 1.3222966344598618e-07, "logits/chosen": 0.11749495565891266, "logits/rejected": 0.15025939047336578, "logps/chosen": -168.78628540039062, "logps/rejected": -299.3243408203125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.832998752593994, "rewards/margins": 13.011874198913574, "rewards/rejected": -15.84487247467041, "step": 4919 }, { "epoch": 1.68, "learning_rate": 1.3195511920121795e-07, "logits/chosen": 0.17879052460193634, "logits/rejected": 0.20340053737163544, "logps/chosen": -206.63442993164062, "logps/rejected": -315.6321105957031, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.0072336941957473755, "rewards/margins": 14.551836013793945, "rewards/rejected": -14.559069633483887, "step": 4920 }, { "epoch": 1.68, "learning_rate": 1.316808401321816e-07, "logits/chosen": -0.040778279304504395, "logits/rejected": -0.01800931803882122, "logps/chosen": -234.8511199951172, "logps/rejected": -337.4211120605469, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.3645767569541931, "rewards/margins": 14.46522045135498, "rewards/rejected": -14.829797744750977, "step": 4921 }, { "epoch": 1.68, "learning_rate": 1.3140682632266543e-07, "logits/chosen": 0.06449993699789047, "logits/rejected": 0.07056983560323715, "logps/chosen": -140.61160278320312, "logps/rejected": -280.3511657714844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.1887335777282715, "rewards/margins": 12.808234214782715, "rewards/rejected": -14.996967315673828, "step": 4922 }, { "epoch": 1.68, "learning_rate": 1.3113307785637694e-07, "logits/chosen": 0.040511589497327805, "logits/rejected": 0.08193224668502808, "logps/chosen": -258.3876953125, "logps/rejected": -373.8473205566406, "loss": 0.0308, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2190535068511963, "rewards/margins": 18.22132110595703, "rewards/rejected": -19.44037628173828, "step": 4923 }, { "epoch": 1.68, "learning_rate": 1.3085959481694264e-07, "logits/chosen": -0.06154903769493103, "logits/rejected": -0.0012628688709810376, "logps/chosen": -192.92958068847656, "logps/rejected": -323.69952392578125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.7661858797073364, "rewards/margins": 17.686975479125977, "rewards/rejected": -18.453163146972656, "step": 4924 }, { "epoch": 1.68, "learning_rate": 1.3058637728790766e-07, "logits/chosen": 0.026395253837108612, "logits/rejected": 0.05210717394948006, "logps/chosen": -188.94618225097656, "logps/rejected": -321.8620300292969, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.4003695249557495, "rewards/margins": 14.600057601928711, "rewards/rejected": -16.00042724609375, "step": 4925 }, { "epoch": 1.68, "learning_rate": 1.3031342535273614e-07, "logits/chosen": 0.11811448633670807, "logits/rejected": 0.11603166162967682, "logps/chosen": -159.8470001220703, "logps/rejected": -358.1246032714844, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -2.4966070652008057, "rewards/margins": 17.38776969909668, "rewards/rejected": -19.884374618530273, "step": 4926 }, { "epoch": 1.68, "learning_rate": 1.3004073909481128e-07, "logits/chosen": -0.057231355458498, "logits/rejected": -0.0189583171159029, "logps/chosen": -272.01519775390625, "logps/rejected": -468.40716552734375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -2.6764075756073, "rewards/margins": 18.250770568847656, "rewards/rejected": -20.92717933654785, "step": 4927 }, { "epoch": 1.68, "learning_rate": 1.297683185974352e-07, "logits/chosen": 0.018752148374915123, "logits/rejected": 0.0211586095392704, "logps/chosen": -213.54603576660156, "logps/rejected": -364.7466735839844, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.688326835632324, "rewards/margins": 13.547112464904785, "rewards/rejected": -16.23543930053711, "step": 4928 }, { "epoch": 1.68, "learning_rate": 1.29496163943828e-07, "logits/chosen": -0.06051609292626381, "logits/rejected": -0.028102358803153038, "logps/chosen": -221.9303436279297, "logps/rejected": -373.86297607421875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.7121238708496094, "rewards/margins": 15.808439254760742, "rewards/rejected": -16.52056121826172, "step": 4929 }, { "epoch": 1.68, "learning_rate": 1.2922427521712965e-07, "logits/chosen": -0.05098912492394447, "logits/rejected": -0.03436729684472084, "logps/chosen": -192.94390869140625, "logps/rejected": -363.1521911621094, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.8227514028549194, "rewards/margins": 14.844186782836914, "rewards/rejected": -16.66693687438965, "step": 4930 }, { "epoch": 1.68, "learning_rate": 1.2895265250039812e-07, "logits/chosen": -0.07823967188596725, "logits/rejected": -0.04569870978593826, "logps/chosen": -248.84683227539062, "logps/rejected": -449.4044189453125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -1.94606614112854, "rewards/margins": 19.12286376953125, "rewards/rejected": -21.068931579589844, "step": 4931 }, { "epoch": 1.68, "learning_rate": 1.286812958766106e-07, "logits/chosen": 0.07637712359428406, "logits/rejected": 0.12317607551813126, "logps/chosen": -177.86575317382812, "logps/rejected": -321.1329040527344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3225929737091064, "rewards/margins": 15.900514602661133, "rewards/rejected": -17.223108291625977, "step": 4932 }, { "epoch": 1.68, "learning_rate": 1.2841020542866289e-07, "logits/chosen": 0.004667616914957762, "logits/rejected": 0.0384872667491436, "logps/chosen": -229.36038208007812, "logps/rejected": -335.8895568847656, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.9914483428001404, "rewards/margins": 14.835611343383789, "rewards/rejected": -15.827058792114258, "step": 4933 }, { "epoch": 1.68, "learning_rate": 1.2813938123936906e-07, "logits/chosen": -0.015976084396243095, "logits/rejected": -0.005086306016892195, "logps/chosen": -241.7430419921875, "logps/rejected": -392.629638671875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4885318279266357, "rewards/margins": 14.503210067749023, "rewards/rejected": -15.991742134094238, "step": 4934 }, { "epoch": 1.68, "learning_rate": 1.2786882339146232e-07, "logits/chosen": 0.06723075360059738, "logits/rejected": 0.07895581424236298, "logps/chosen": -216.16732788085938, "logps/rejected": -368.0450744628906, "loss": 0.04, "rewards/accuracies": 0.9375, "rewards/chosen": -0.37446558475494385, "rewards/margins": 16.051013946533203, "rewards/rejected": -16.42548179626465, "step": 4935 }, { "epoch": 1.68, "learning_rate": 1.275985319675945e-07, "logits/chosen": -0.003393962513655424, "logits/rejected": -0.002231396036222577, "logps/chosen": -175.90994262695312, "logps/rejected": -340.3436584472656, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.7712552547454834, "rewards/margins": 13.581539154052734, "rewards/rejected": -16.35279655456543, "step": 4936 }, { "epoch": 1.68, "learning_rate": 1.273285070503356e-07, "logits/chosen": 0.10094719380140305, "logits/rejected": 0.11633172631263733, "logps/chosen": -181.80026245117188, "logps/rejected": -323.6420593261719, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0234051942825317, "rewards/margins": 14.01185131072998, "rewards/rejected": -15.035256385803223, "step": 4937 }, { "epoch": 1.69, "learning_rate": 1.270587487221747e-07, "logits/chosen": 0.16028939187526703, "logits/rejected": 0.1897471398115158, "logps/chosen": -199.37977600097656, "logps/rejected": -331.34478759765625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.4617873430252075, "rewards/margins": 14.417365074157715, "rewards/rejected": -15.879154205322266, "step": 4938 }, { "epoch": 1.69, "learning_rate": 1.2678925706551934e-07, "logits/chosen": 0.03085043840110302, "logits/rejected": 0.0734793022274971, "logps/chosen": -229.0662841796875, "logps/rejected": -404.2540283203125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.963428020477295, "rewards/margins": 19.697998046875, "rewards/rejected": -21.661426544189453, "step": 4939 }, { "epoch": 1.69, "learning_rate": 1.2652003216269524e-07, "logits/chosen": -0.0813838541507721, "logits/rejected": -0.053701478987932205, "logps/chosen": -187.2885284423828, "logps/rejected": -390.85833740234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0715748071670532, "rewards/margins": 19.147668838500977, "rewards/rejected": -20.219242095947266, "step": 4940 }, { "epoch": 1.69, "learning_rate": 1.2625107409594693e-07, "logits/chosen": 0.02206951007246971, "logits/rejected": 0.08302227407693863, "logps/chosen": -188.84811401367188, "logps/rejected": -433.4013366699219, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.8812832832336426, "rewards/margins": 24.25885581970215, "rewards/rejected": -27.1401424407959, "step": 4941 }, { "epoch": 1.69, "learning_rate": 1.2598238294743756e-07, "logits/chosen": 0.05722471699118614, "logits/rejected": 0.07452461123466492, "logps/chosen": -227.126708984375, "logps/rejected": -401.4735412597656, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.0029683113098145, "rewards/margins": 18.151287078857422, "rewards/rejected": -20.154254913330078, "step": 4942 }, { "epoch": 1.69, "learning_rate": 1.2571395879924874e-07, "logits/chosen": 0.11291362345218658, "logits/rejected": 0.1477450579404831, "logps/chosen": -206.4801788330078, "logps/rejected": -317.94830322265625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.038407210260629654, "rewards/margins": 14.710042953491211, "rewards/rejected": -14.748449325561523, "step": 4943 }, { "epoch": 1.69, "learning_rate": 1.2544580173337983e-07, "logits/chosen": 0.057115569710731506, "logits/rejected": 0.08754601329565048, "logps/chosen": -198.93504333496094, "logps/rejected": -370.65252685546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.845935344696045, "rewards/margins": 15.681537628173828, "rewards/rejected": -18.52747344970703, "step": 4944 }, { "epoch": 1.69, "learning_rate": 1.251779118317494e-07, "logits/chosen": 0.01740836724638939, "logits/rejected": 0.027656376361846924, "logps/chosen": -194.77073669433594, "logps/rejected": -430.0989990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8719077110290527, "rewards/margins": 21.187040328979492, "rewards/rejected": -22.058948516845703, "step": 4945 }, { "epoch": 1.69, "learning_rate": 1.2491028917619406e-07, "logits/chosen": -0.06468077749013901, "logits/rejected": -0.015102644450962543, "logps/chosen": -231.46990966796875, "logps/rejected": -347.0489807128906, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.6085952520370483, "rewards/margins": 15.414621353149414, "rewards/rejected": -17.023216247558594, "step": 4946 }, { "epoch": 1.69, "learning_rate": 1.2464293384846891e-07, "logits/chosen": 0.07696866244077682, "logits/rejected": 0.11349242925643921, "logps/chosen": -206.9933624267578, "logps/rejected": -372.8106384277344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4753599166870117, "rewards/margins": 18.238780975341797, "rewards/rejected": -20.714139938354492, "step": 4947 }, { "epoch": 1.69, "learning_rate": 1.2437584593024752e-07, "logits/chosen": 0.041254572570323944, "logits/rejected": 0.044330138713121414, "logps/chosen": -232.08016967773438, "logps/rejected": -409.2835388183594, "loss": 0.0165, "rewards/accuracies": 0.9375, "rewards/chosen": -1.679789662361145, "rewards/margins": 16.55063819885254, "rewards/rejected": -18.230426788330078, "step": 4948 }, { "epoch": 1.69, "learning_rate": 1.2410902550312108e-07, "logits/chosen": 0.060980647802352905, "logits/rejected": 0.07605531811714172, "logps/chosen": -227.45396423339844, "logps/rejected": -354.54510498046875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.7528247833251953, "rewards/margins": 13.65074348449707, "rewards/rejected": -16.403568267822266, "step": 4949 }, { "epoch": 1.69, "learning_rate": 1.238424726485997e-07, "logits/chosen": 0.056464601308107376, "logits/rejected": 0.10741273313760757, "logps/chosen": -193.9424591064453, "logps/rejected": -392.60595703125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.8378357887268066, "rewards/margins": 19.457714080810547, "rewards/rejected": -21.295551300048828, "step": 4950 }, { "epoch": 1.69, "learning_rate": 1.2357618744811204e-07, "logits/chosen": -0.0310040432959795, "logits/rejected": -0.014282816089689732, "logps/chosen": -202.61741638183594, "logps/rejected": -343.64605712890625, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 0.007723554968833923, "rewards/margins": 14.206348419189453, "rewards/rejected": -14.198625564575195, "step": 4951 }, { "epoch": 1.69, "learning_rate": 1.2331016998300393e-07, "logits/chosen": 0.0904906690120697, "logits/rejected": 0.09811028093099594, "logps/chosen": -100.1245346069336, "logps/rejected": -233.21934509277344, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.702418565750122, "rewards/margins": 11.509940147399902, "rewards/rejected": -13.212358474731445, "step": 4952 }, { "epoch": 1.69, "learning_rate": 1.230444203345403e-07, "logits/chosen": 0.047381795942783356, "logits/rejected": 0.08582058548927307, "logps/chosen": -216.77767944335938, "logps/rejected": -349.7394104003906, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.17132923007011414, "rewards/margins": 17.974166870117188, "rewards/rejected": -18.145496368408203, "step": 4953 }, { "epoch": 1.69, "learning_rate": 1.227789385839042e-07, "logits/chosen": -0.039426419883966446, "logits/rejected": -0.005376662593334913, "logps/chosen": -186.8323211669922, "logps/rejected": -271.56646728515625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.1178194284439087, "rewards/margins": 13.351983070373535, "rewards/rejected": -14.469803810119629, "step": 4954 }, { "epoch": 1.69, "learning_rate": 1.2251372481219624e-07, "logits/chosen": 0.011342152021825314, "logits/rejected": 0.04152914136648178, "logps/chosen": -167.95111083984375, "logps/rejected": -333.4641418457031, "loss": 0.0479, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0970678329467773, "rewards/margins": 16.90522003173828, "rewards/rejected": -19.002288818359375, "step": 4955 }, { "epoch": 1.69, "learning_rate": 1.2224877910043584e-07, "logits/chosen": -0.04278893396258354, "logits/rejected": -0.0012702259700745344, "logps/chosen": -214.48434448242188, "logps/rejected": -398.2063293457031, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.6318087577819824, "rewards/margins": 18.66422462463379, "rewards/rejected": -20.296035766601562, "step": 4956 }, { "epoch": 1.69, "learning_rate": 1.2198410152956006e-07, "logits/chosen": 0.00994599424302578, "logits/rejected": 0.027118440717458725, "logps/chosen": -239.98562622070312, "logps/rejected": -395.8971862792969, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.47525691986084, "rewards/margins": 17.05181121826172, "rewards/rejected": -19.527067184448242, "step": 4957 }, { "epoch": 1.69, "learning_rate": 1.2171969218042477e-07, "logits/chosen": 0.12898385524749756, "logits/rejected": 0.14100879430770874, "logps/chosen": -151.64202880859375, "logps/rejected": -344.669921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.1509170532226562, "rewards/margins": 16.93037986755371, "rewards/rejected": -19.081295013427734, "step": 4958 }, { "epoch": 1.69, "learning_rate": 1.2145555113380268e-07, "logits/chosen": 0.026150261983275414, "logits/rejected": 0.07948184013366699, "logps/chosen": -234.09707641601562, "logps/rejected": -364.999267578125, "loss": 0.078, "rewards/accuracies": 0.9375, "rewards/chosen": -2.824626922607422, "rewards/margins": 16.2574462890625, "rewards/rejected": -19.082073211669922, "step": 4959 }, { "epoch": 1.69, "learning_rate": 1.2119167847038547e-07, "logits/chosen": 0.06925420463085175, "logits/rejected": 0.11604713648557663, "logps/chosen": -191.8343963623047, "logps/rejected": -328.17535400390625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.6537952423095703, "rewards/margins": 15.868654251098633, "rewards/rejected": -18.522451400756836, "step": 4960 }, { "epoch": 1.69, "learning_rate": 1.209280742707828e-07, "logits/chosen": 0.057226914912462234, "logits/rejected": 0.06655944883823395, "logps/chosen": -208.63902282714844, "logps/rejected": -393.4547119140625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 0.029531754553318024, "rewards/margins": 15.544770240783691, "rewards/rejected": -15.515237808227539, "step": 4961 }, { "epoch": 1.69, "learning_rate": 1.2066473861552207e-07, "logits/chosen": -0.04480818286538124, "logits/rejected": -0.02538878656923771, "logps/chosen": -174.27333068847656, "logps/rejected": -318.4112243652344, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -1.572036623954773, "rewards/margins": 15.094913482666016, "rewards/rejected": -16.666948318481445, "step": 4962 }, { "epoch": 1.69, "learning_rate": 1.2040167158504843e-07, "logits/chosen": 0.033771585673093796, "logits/rejected": 0.04836754500865936, "logps/chosen": -213.5906219482422, "logps/rejected": -412.8548583984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.3775196075439453, "rewards/margins": 17.8962345123291, "rewards/rejected": -20.27375602722168, "step": 4963 }, { "epoch": 1.69, "learning_rate": 1.201388732597255e-07, "logits/chosen": 0.10347498208284378, "logits/rejected": 0.11975888907909393, "logps/chosen": -188.86642456054688, "logps/rejected": -357.217041015625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4599480926990509, "rewards/margins": 18.069385528564453, "rewards/rejected": -18.529333114624023, "step": 4964 }, { "epoch": 1.69, "learning_rate": 1.1987634371983436e-07, "logits/chosen": 0.03050420992076397, "logits/rejected": 0.04167008399963379, "logps/chosen": -192.41661071777344, "logps/rejected": -378.23651123046875, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -2.097572088241577, "rewards/margins": 15.41416072845459, "rewards/rejected": -17.51173210144043, "step": 4965 }, { "epoch": 1.69, "learning_rate": 1.196140830455744e-07, "logits/chosen": -0.05295715481042862, "logits/rejected": -0.0012999160680919886, "logps/chosen": -284.9163818359375, "logps/rejected": -447.9773254394531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.576780319213867, "rewards/margins": 17.96433448791504, "rewards/rejected": -21.541114807128906, "step": 4966 }, { "epoch": 1.7, "learning_rate": 1.1935209131706236e-07, "logits/chosen": 0.11660049855709076, "logits/rejected": 0.1440742313861847, "logps/chosen": -181.03414916992188, "logps/rejected": -298.3271484375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.1978917121887207, "rewards/margins": 14.060508728027344, "rewards/rejected": -16.25840187072754, "step": 4967 }, { "epoch": 1.7, "learning_rate": 1.190903686143332e-07, "logits/chosen": 0.025823071599006653, "logits/rejected": 0.04646037146449089, "logps/chosen": -256.13067626953125, "logps/rejected": -399.18231201171875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2246813774108887, "rewards/margins": 17.319547653198242, "rewards/rejected": -18.54422950744629, "step": 4968 }, { "epoch": 1.7, "learning_rate": 1.1882891501733971e-07, "logits/chosen": -0.0392138697206974, "logits/rejected": -0.010429663583636284, "logps/chosen": -199.08389282226562, "logps/rejected": -368.9422607421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.2946186065673828, "rewards/margins": 18.32277488708496, "rewards/rejected": -17.02815818786621, "step": 4969 }, { "epoch": 1.7, "learning_rate": 1.1856773060595216e-07, "logits/chosen": 0.09320325404405594, "logits/rejected": 0.13852740824222565, "logps/chosen": -209.28799438476562, "logps/rejected": -297.9168701171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.312023639678955, "rewards/margins": 14.455522537231445, "rewards/rejected": -15.767544746398926, "step": 4970 }, { "epoch": 1.7, "learning_rate": 1.183068154599588e-07, "logits/chosen": 0.014874059706926346, "logits/rejected": 0.034167565405368805, "logps/chosen": -257.0851135253906, "logps/rejected": -447.1258544921875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.241032361984253, "rewards/margins": 18.146215438842773, "rewards/rejected": -19.387248992919922, "step": 4971 }, { "epoch": 1.7, "learning_rate": 1.18046169659066e-07, "logits/chosen": 0.11337766796350479, "logits/rejected": 0.1331721991300583, "logps/chosen": -190.38612365722656, "logps/rejected": -338.076416015625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.66542911529541, "rewards/margins": 13.466425895690918, "rewards/rejected": -16.131853103637695, "step": 4972 }, { "epoch": 1.7, "learning_rate": 1.1778579328289684e-07, "logits/chosen": 0.013893768191337585, "logits/rejected": 0.03473397716879845, "logps/chosen": -226.35923767089844, "logps/rejected": -420.9965515136719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.295487642288208, "rewards/margins": 18.00397491455078, "rewards/rejected": -19.299463272094727, "step": 4973 }, { "epoch": 1.7, "learning_rate": 1.1752568641099303e-07, "logits/chosen": -0.04139585793018341, "logits/rejected": 0.007010127883404493, "logps/chosen": -221.86793518066406, "logps/rejected": -287.8868408203125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.0371761322021484, "rewards/margins": 10.178879737854004, "rewards/rejected": -11.216055870056152, "step": 4974 }, { "epoch": 1.7, "learning_rate": 1.1726584912281367e-07, "logits/chosen": 0.11416260898113251, "logits/rejected": 0.14270196855068207, "logps/chosen": -193.87405395507812, "logps/rejected": -312.6754455566406, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.3904210329055786, "rewards/margins": 16.01991844177246, "rewards/rejected": -17.41033935546875, "step": 4975 }, { "epoch": 1.7, "learning_rate": 1.1700628149773539e-07, "logits/chosen": 0.03004375472664833, "logits/rejected": 0.048794008791446686, "logps/chosen": -203.6266326904297, "logps/rejected": -371.618408203125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8662324547767639, "rewards/margins": 17.76875877380371, "rewards/rejected": -18.634992599487305, "step": 4976 }, { "epoch": 1.7, "learning_rate": 1.1674698361505264e-07, "logits/chosen": 0.1401086151599884, "logits/rejected": 0.15695959329605103, "logps/chosen": -136.20236206054688, "logps/rejected": -268.4532470703125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.5563837885856628, "rewards/margins": 14.372037887573242, "rewards/rejected": -14.928422927856445, "step": 4977 }, { "epoch": 1.7, "learning_rate": 1.1648795555397716e-07, "logits/chosen": 0.00293611828237772, "logits/rejected": 0.016065970063209534, "logps/chosen": -244.42335510253906, "logps/rejected": -434.654296875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.675897479057312, "rewards/margins": 18.048484802246094, "rewards/rejected": -19.724380493164062, "step": 4978 }, { "epoch": 1.7, "learning_rate": 1.1622919739363846e-07, "logits/chosen": 0.03697021305561066, "logits/rejected": 0.07007384300231934, "logps/chosen": -242.883544921875, "logps/rejected": -402.79461669921875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.7037479877471924, "rewards/margins": 16.51323890686035, "rewards/rejected": -19.216983795166016, "step": 4979 }, { "epoch": 1.7, "learning_rate": 1.1597070921308361e-07, "logits/chosen": 0.013246381655335426, "logits/rejected": 0.047364797443151474, "logps/chosen": -279.59759521484375, "logps/rejected": -472.7248840332031, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6799689531326294, "rewards/margins": 20.816783905029297, "rewards/rejected": -21.496753692626953, "step": 4980 }, { "epoch": 1.7, "learning_rate": 1.1571249109127745e-07, "logits/chosen": 0.015368977561593056, "logits/rejected": 0.07492789626121521, "logps/chosen": -260.7297668457031, "logps/rejected": -329.94561767578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0738641023635864, "rewards/margins": 15.137868881225586, "rewards/rejected": -16.211734771728516, "step": 4981 }, { "epoch": 1.7, "learning_rate": 1.1545454310710167e-07, "logits/chosen": -0.011475588195025921, "logits/rejected": -0.012235668487846851, "logps/chosen": -193.97181701660156, "logps/rejected": -389.3436279296875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.9168412089347839, "rewards/margins": 16.5802059173584, "rewards/rejected": -17.497047424316406, "step": 4982 }, { "epoch": 1.7, "learning_rate": 1.1519686533935591e-07, "logits/chosen": -0.02810591273009777, "logits/rejected": -0.02408961020410061, "logps/chosen": -257.47222900390625, "logps/rejected": -434.5091857910156, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.9448405504226685, "rewards/margins": 17.640453338623047, "rewards/rejected": -19.585294723510742, "step": 4983 }, { "epoch": 1.7, "learning_rate": 1.1493945786675751e-07, "logits/chosen": 0.023794785141944885, "logits/rejected": 0.0497489869594574, "logps/chosen": -178.86788940429688, "logps/rejected": -354.57574462890625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.5880107879638672, "rewards/margins": 15.220235824584961, "rewards/rejected": -16.808246612548828, "step": 4984 }, { "epoch": 1.7, "learning_rate": 1.1468232076794038e-07, "logits/chosen": 0.09199375659227371, "logits/rejected": 0.11989298462867737, "logps/chosen": -225.4359588623047, "logps/rejected": -388.9150695800781, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.7457618713378906, "rewards/margins": 16.027095794677734, "rewards/rejected": -18.772857666015625, "step": 4985 }, { "epoch": 1.7, "learning_rate": 1.1442545412145665e-07, "logits/chosen": -0.025455253198742867, "logits/rejected": -0.007589337881654501, "logps/chosen": -175.34107971191406, "logps/rejected": -410.89715576171875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.4288065433502197, "rewards/margins": 20.512683868408203, "rewards/rejected": -21.941492080688477, "step": 4986 }, { "epoch": 1.7, "learning_rate": 1.1416885800577569e-07, "logits/chosen": 0.01028897613286972, "logits/rejected": 0.034873925149440765, "logps/chosen": -201.87368774414062, "logps/rejected": -380.1904602050781, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.8785988688468933, "rewards/margins": 15.773674964904785, "rewards/rejected": -16.65227508544922, "step": 4987 }, { "epoch": 1.7, "learning_rate": 1.1391253249928368e-07, "logits/chosen": 0.09975681453943253, "logits/rejected": 0.12427838891744614, "logps/chosen": -196.0396728515625, "logps/rejected": -385.8531494140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8202879428863525, "rewards/margins": 18.156782150268555, "rewards/rejected": -19.97707176208496, "step": 4988 }, { "epoch": 1.7, "learning_rate": 1.1365647768028463e-07, "logits/chosen": -0.0008950766641646624, "logits/rejected": 0.01193267572671175, "logps/chosen": -180.8700408935547, "logps/rejected": -374.0771484375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.6739205121994019, "rewards/margins": 16.074481964111328, "rewards/rejected": -17.748403549194336, "step": 4989 }, { "epoch": 1.7, "learning_rate": 1.1340069362699988e-07, "logits/chosen": 0.09501654654741287, "logits/rejected": 0.11435016244649887, "logps/chosen": -255.6090850830078, "logps/rejected": -482.9376525878906, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.007920831441879272, "rewards/margins": 21.358013153076172, "rewards/rejected": -21.365934371948242, "step": 4990 }, { "epoch": 1.7, "learning_rate": 1.1314518041756794e-07, "logits/chosen": 0.13119997084140778, "logits/rejected": 0.1543433517217636, "logps/chosen": -158.96302795410156, "logps/rejected": -255.57174682617188, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.8291559219360352, "rewards/margins": 11.094396591186523, "rewards/rejected": -12.923552513122559, "step": 4991 }, { "epoch": 1.7, "learning_rate": 1.1288993813004466e-07, "logits/chosen": 0.0804409459233284, "logits/rejected": 0.1103387176990509, "logps/chosen": -226.64813232421875, "logps/rejected": -494.1419982910156, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7599010467529297, "rewards/margins": 24.193845748901367, "rewards/rejected": -25.953746795654297, "step": 4992 }, { "epoch": 1.7, "learning_rate": 1.126349668424027e-07, "logits/chosen": 0.059867069125175476, "logits/rejected": 0.08786585181951523, "logps/chosen": -186.5253448486328, "logps/rejected": -352.5797119140625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7123156189918518, "rewards/margins": 17.036584854125977, "rewards/rejected": -17.7489013671875, "step": 4993 }, { "epoch": 1.7, "learning_rate": 1.1238026663253252e-07, "logits/chosen": 0.14112576842308044, "logits/rejected": 0.15047286450862885, "logps/chosen": -105.56388092041016, "logps/rejected": -238.1844024658203, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9890964031219482, "rewards/margins": 13.026455879211426, "rewards/rejected": -14.015552520751953, "step": 4994 }, { "epoch": 1.7, "learning_rate": 1.1212583757824145e-07, "logits/chosen": -0.011640961281955242, "logits/rejected": 0.025978688150644302, "logps/chosen": -205.54835510253906, "logps/rejected": -314.32073974609375, "loss": 0.0196, "rewards/accuracies": 0.9375, "rewards/chosen": -1.967777132987976, "rewards/margins": 14.937551498413086, "rewards/rejected": -16.905330657958984, "step": 4995 }, { "epoch": 1.71, "learning_rate": 1.118716797572542e-07, "logits/chosen": -0.0941697433590889, "logits/rejected": -0.07815767824649811, "logps/chosen": -264.2307434082031, "logps/rejected": -427.3927001953125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -3.1424903869628906, "rewards/margins": 16.79534149169922, "rewards/rejected": -19.937829971313477, "step": 4996 }, { "epoch": 1.71, "learning_rate": 1.1161779324721221e-07, "logits/chosen": 0.13707154989242554, "logits/rejected": 0.15881387889385223, "logps/chosen": -155.93826293945312, "logps/rejected": -252.17970275878906, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.1124427318573, "rewards/margins": 11.457881927490234, "rewards/rejected": -13.57032585144043, "step": 4997 }, { "epoch": 1.71, "learning_rate": 1.1136417812567445e-07, "logits/chosen": -0.009482410736382008, "logits/rejected": 0.0037270996253937483, "logps/chosen": -211.66123962402344, "logps/rejected": -404.1763610839844, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4898872375488281, "rewards/margins": 18.009178161621094, "rewards/rejected": -19.499065399169922, "step": 4998 }, { "epoch": 1.71, "learning_rate": 1.1111083447011704e-07, "logits/chosen": 0.07945755869150162, "logits/rejected": 0.10852692276239395, "logps/chosen": -203.43272399902344, "logps/rejected": -411.37579345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.06768462061882019, "rewards/margins": 20.87759780883789, "rewards/rejected": -20.945283889770508, "step": 4999 }, { "epoch": 1.71, "learning_rate": 1.1085776235793243e-07, "logits/chosen": 0.08069780468940735, "logits/rejected": 0.09716950356960297, "logps/chosen": -223.15740966796875, "logps/rejected": -383.138671875, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -2.061779499053955, "rewards/margins": 13.72504997253418, "rewards/rejected": -15.786829948425293, "step": 5000 }, { "epoch": 1.71, "learning_rate": 1.106049618664311e-07, "logits/chosen": 0.009637159295380116, "logits/rejected": 0.041392210870981216, "logps/chosen": -182.1318817138672, "logps/rejected": -347.8125915527344, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.4328049123287201, "rewards/margins": 18.639305114746094, "rewards/rejected": -19.072111129760742, "step": 5001 }, { "epoch": 1.71, "learning_rate": 1.1035243307284026e-07, "logits/chosen": -0.046548161655664444, "logits/rejected": 0.010833547450602055, "logps/chosen": -248.29226684570312, "logps/rejected": -363.02093505859375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 0.7878381013870239, "rewards/margins": 18.78240203857422, "rewards/rejected": -17.99456214904785, "step": 5002 }, { "epoch": 1.71, "learning_rate": 1.1010017605430333e-07, "logits/chosen": -0.0329759381711483, "logits/rejected": 0.0016607342986389995, "logps/chosen": -257.3186950683594, "logps/rejected": -349.3643493652344, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.0320019721984863, "rewards/margins": 14.33489990234375, "rewards/rejected": -15.366903305053711, "step": 5003 }, { "epoch": 1.71, "learning_rate": 1.0984819088788189e-07, "logits/chosen": -0.01859920658171177, "logits/rejected": 0.003821600927039981, "logps/chosen": -168.62149047851562, "logps/rejected": -343.31915283203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.3643531799316406, "rewards/margins": 17.810590744018555, "rewards/rejected": -17.446239471435547, "step": 5004 }, { "epoch": 1.71, "learning_rate": 1.0959647765055391e-07, "logits/chosen": -0.011164906434714794, "logits/rejected": 0.00031425582710653543, "logps/chosen": -208.78744506835938, "logps/rejected": -381.6729431152344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9000455141067505, "rewards/margins": 16.321025848388672, "rewards/rejected": -18.221071243286133, "step": 5005 }, { "epoch": 1.71, "learning_rate": 1.09345036419214e-07, "logits/chosen": 0.07489456981420517, "logits/rejected": 0.09477992355823517, "logps/chosen": -246.39231872558594, "logps/rejected": -383.3836669921875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.8452164530754089, "rewards/margins": 15.665283203125, "rewards/rejected": -14.820066452026367, "step": 5006 }, { "epoch": 1.71, "learning_rate": 1.0909386727067404e-07, "logits/chosen": -0.011010930873453617, "logits/rejected": 0.003651941893622279, "logps/chosen": -236.94137573242188, "logps/rejected": -440.1562194824219, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.4379968643188477, "rewards/margins": 19.001510620117188, "rewards/rejected": -21.43950843811035, "step": 5007 }, { "epoch": 1.71, "learning_rate": 1.0884297028166301e-07, "logits/chosen": -0.0024127967189997435, "logits/rejected": 0.03254389762878418, "logps/chosen": -192.01112365722656, "logps/rejected": -363.3278503417969, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.3488187789916992, "rewards/margins": 17.65118980407715, "rewards/rejected": -19.00000762939453, "step": 5008 }, { "epoch": 1.71, "learning_rate": 1.0859234552882623e-07, "logits/chosen": 0.06007888540625572, "logits/rejected": 0.07329972833395004, "logps/chosen": -190.55397033691406, "logps/rejected": -370.47406005859375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.337251663208008, "rewards/margins": 16.96769142150879, "rewards/rejected": -19.30494499206543, "step": 5009 }, { "epoch": 1.71, "learning_rate": 1.0834199308872638e-07, "logits/chosen": 0.0020841695368289948, "logits/rejected": 0.020781150087714195, "logps/chosen": -193.94407653808594, "logps/rejected": -321.4688720703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4513994455337524, "rewards/margins": 13.178422927856445, "rewards/rejected": -14.62982177734375, "step": 5010 }, { "epoch": 1.71, "learning_rate": 1.0809191303784238e-07, "logits/chosen": 0.06265769153833389, "logits/rejected": 0.11285987496376038, "logps/chosen": -239.9720916748047, "logps/rejected": -375.03369140625, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.9955600500106812, "rewards/margins": 18.055313110351562, "rewards/rejected": -20.050872802734375, "step": 5011 }, { "epoch": 1.71, "learning_rate": 1.0784210545257033e-07, "logits/chosen": 0.036109574139118195, "logits/rejected": 0.07288007438182831, "logps/chosen": -207.32449340820312, "logps/rejected": -348.270751953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2469899654388428, "rewards/margins": 16.117515563964844, "rewards/rejected": -17.364505767822266, "step": 5012 }, { "epoch": 1.71, "learning_rate": 1.0759257040922298e-07, "logits/chosen": -0.0032808587420731783, "logits/rejected": 0.02232540026307106, "logps/chosen": -170.85275268554688, "logps/rejected": -318.069091796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9305604696273804, "rewards/margins": 13.887325286865234, "rewards/rejected": -15.817888259887695, "step": 5013 }, { "epoch": 1.71, "learning_rate": 1.0734330798403002e-07, "logits/chosen": -0.03226914256811142, "logits/rejected": 0.006745911203324795, "logps/chosen": -276.7178039550781, "logps/rejected": -444.7326965332031, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -0.6693087816238403, "rewards/margins": 17.631118774414062, "rewards/rejected": -18.30042839050293, "step": 5014 }, { "epoch": 1.71, "learning_rate": 1.0709431825313742e-07, "logits/chosen": 0.03127645328640938, "logits/rejected": 0.06660280376672745, "logps/chosen": -213.2014617919922, "logps/rejected": -355.0628662109375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.1922142505645752, "rewards/margins": 17.02528953552246, "rewards/rejected": -18.217506408691406, "step": 5015 }, { "epoch": 1.71, "learning_rate": 1.0684560129260822e-07, "logits/chosen": 0.0045991577208042145, "logits/rejected": 0.08001101762056351, "logps/chosen": -227.6422882080078, "logps/rejected": -298.24554443359375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.4664397239685059, "rewards/margins": 13.24647331237793, "rewards/rejected": -14.712913513183594, "step": 5016 }, { "epoch": 1.71, "learning_rate": 1.0659715717842221e-07, "logits/chosen": 0.04098216071724892, "logits/rejected": 0.08536078035831451, "logps/chosen": -221.32937622070312, "logps/rejected": -336.82464599609375, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -2.169389486312866, "rewards/margins": 13.52546501159668, "rewards/rejected": -15.694855690002441, "step": 5017 }, { "epoch": 1.71, "learning_rate": 1.0634898598647524e-07, "logits/chosen": -0.04880157858133316, "logits/rejected": -0.012853337451815605, "logps/chosen": -222.25070190429688, "logps/rejected": -346.9593505859375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.2453553676605225, "rewards/margins": 15.07050609588623, "rewards/rejected": -18.315860748291016, "step": 5018 }, { "epoch": 1.71, "learning_rate": 1.0610108779258042e-07, "logits/chosen": -0.01800217479467392, "logits/rejected": 0.02162901870906353, "logps/chosen": -272.3271179199219, "logps/rejected": -413.3308410644531, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.1764309406280518, "rewards/margins": 16.01660919189453, "rewards/rejected": -19.19304084777832, "step": 5019 }, { "epoch": 1.71, "learning_rate": 1.0585346267246742e-07, "logits/chosen": 0.1059669628739357, "logits/rejected": 0.14132308959960938, "logps/chosen": -152.84378051757812, "logps/rejected": -278.8438720703125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.9149982929229736, "rewards/margins": 15.381452560424805, "rewards/rejected": -17.296451568603516, "step": 5020 }, { "epoch": 1.71, "learning_rate": 1.0560611070178182e-07, "logits/chosen": 0.04087693244218826, "logits/rejected": 0.056093063205480576, "logps/chosen": -162.96810913085938, "logps/rejected": -371.1133728027344, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.2259907722473145, "rewards/margins": 18.026229858398438, "rewards/rejected": -20.252220153808594, "step": 5021 }, { "epoch": 1.71, "learning_rate": 1.0535903195608664e-07, "logits/chosen": 0.08109200745820999, "logits/rejected": 0.09532494097948074, "logps/chosen": -171.3522491455078, "logps/rejected": -302.1216735839844, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.6876180171966553, "rewards/margins": 15.362242698669434, "rewards/rejected": -16.04986000061035, "step": 5022 }, { "epoch": 1.71, "learning_rate": 1.0511222651086072e-07, "logits/chosen": 0.07712295651435852, "logits/rejected": 0.10005324333906174, "logps/chosen": -193.39022827148438, "logps/rejected": -369.1640930175781, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.8476052284240723, "rewards/margins": 16.919889450073242, "rewards/rejected": -19.767494201660156, "step": 5023 }, { "epoch": 1.71, "learning_rate": 1.0486569444149996e-07, "logits/chosen": 0.11154107004404068, "logits/rejected": 0.11878849565982819, "logps/chosen": -227.77346801757812, "logps/rejected": -417.715087890625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.0106637477874756, "rewards/margins": 17.03022003173828, "rewards/rejected": -19.04088592529297, "step": 5024 }, { "epoch": 1.72, "learning_rate": 1.0461943582331656e-07, "logits/chosen": -0.03635711967945099, "logits/rejected": -0.027747076004743576, "logps/chosen": -214.307861328125, "logps/rejected": -358.4782409667969, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.9558056592941284, "rewards/margins": 15.354813575744629, "rewards/rejected": -17.31061553955078, "step": 5025 }, { "epoch": 1.72, "learning_rate": 1.0437345073153892e-07, "logits/chosen": 0.08527574688196182, "logits/rejected": 0.09182107448577881, "logps/chosen": -187.82801818847656, "logps/rejected": -360.1655578613281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.2505555152893066, "rewards/margins": 16.796724319458008, "rewards/rejected": -20.047283172607422, "step": 5026 }, { "epoch": 1.72, "learning_rate": 1.0412773924131202e-07, "logits/chosen": -0.0030923604499548674, "logits/rejected": 0.015337405726313591, "logps/chosen": -211.2440185546875, "logps/rejected": -393.6571350097656, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -3.07490611076355, "rewards/margins": 18.403762817382812, "rewards/rejected": -21.478666305541992, "step": 5027 }, { "epoch": 1.72, "learning_rate": 1.0388230142769749e-07, "logits/chosen": 0.129779651761055, "logits/rejected": 0.1703169196844101, "logps/chosen": -230.94847106933594, "logps/rejected": -340.1997985839844, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.3069710731506348, "rewards/margins": 13.28455924987793, "rewards/rejected": -15.591529846191406, "step": 5028 }, { "epoch": 1.72, "learning_rate": 1.0363713736567348e-07, "logits/chosen": -0.03491005674004555, "logits/rejected": 0.014275526627898216, "logps/chosen": -195.3875274658203, "logps/rejected": -342.02569580078125, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.7681944370269775, "rewards/margins": 18.243453979492188, "rewards/rejected": -20.011646270751953, "step": 5029 }, { "epoch": 1.72, "learning_rate": 1.0339224713013362e-07, "logits/chosen": 0.0793643519282341, "logits/rejected": 0.096086785197258, "logps/chosen": -181.6991729736328, "logps/rejected": -308.3258972167969, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.4153943061828613, "rewards/margins": 12.431448936462402, "rewards/rejected": -14.846844673156738, "step": 5030 }, { "epoch": 1.72, "learning_rate": 1.0314763079588895e-07, "logits/chosen": 0.0216338150203228, "logits/rejected": 0.03595828264951706, "logps/chosen": -178.80984497070312, "logps/rejected": -349.2168273925781, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -1.1779263019561768, "rewards/margins": 17.02592658996582, "rewards/rejected": -18.2038516998291, "step": 5031 }, { "epoch": 1.72, "learning_rate": 1.0290328843766627e-07, "logits/chosen": 0.0870196670293808, "logits/rejected": 0.13725045323371887, "logps/chosen": -227.16116333007812, "logps/rejected": -305.310791015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.26327502727508545, "rewards/margins": 15.416818618774414, "rewards/rejected": -15.680092811584473, "step": 5032 }, { "epoch": 1.72, "learning_rate": 1.0265922013010864e-07, "logits/chosen": 0.018486689776182175, "logits/rejected": 0.039280377328395844, "logps/chosen": -244.36669921875, "logps/rejected": -382.6087341308594, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.08540590107440948, "rewards/margins": 15.21280288696289, "rewards/rejected": -15.127398490905762, "step": 5033 }, { "epoch": 1.72, "learning_rate": 1.0241542594777575e-07, "logits/chosen": -0.02362031675875187, "logits/rejected": -0.012641044333577156, "logps/chosen": -262.1811828613281, "logps/rejected": -435.90338134765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.585216999053955, "rewards/margins": 17.435976028442383, "rewards/rejected": -21.021194458007812, "step": 5034 }, { "epoch": 1.72, "learning_rate": 1.0217190596514336e-07, "logits/chosen": 0.10674396902322769, "logits/rejected": 0.13702471554279327, "logps/chosen": -158.3661346435547, "logps/rejected": -264.857421875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -2.262918472290039, "rewards/margins": 11.851393699645996, "rewards/rejected": -14.114312171936035, "step": 5035 }, { "epoch": 1.72, "learning_rate": 1.019286602566033e-07, "logits/chosen": -0.04845697060227394, "logits/rejected": -0.03307642042636871, "logps/chosen": -255.3197021484375, "logps/rejected": -471.37908935546875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.610508680343628, "rewards/margins": 20.028715133666992, "rewards/rejected": -21.639225006103516, "step": 5036 }, { "epoch": 1.72, "learning_rate": 1.0168568889646389e-07, "logits/chosen": 0.05898648127913475, "logits/rejected": 0.1032809242606163, "logps/chosen": -202.59031677246094, "logps/rejected": -350.67626953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4105515778064728, "rewards/margins": 19.144832611083984, "rewards/rejected": -19.555383682250977, "step": 5037 }, { "epoch": 1.72, "learning_rate": 1.0144299195894967e-07, "logits/chosen": 0.12334149330854416, "logits/rejected": 0.15050022304058075, "logps/chosen": -189.1818084716797, "logps/rejected": -290.25732421875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.9755284786224365, "rewards/margins": 12.67307186126709, "rewards/rejected": -13.648602485656738, "step": 5038 }, { "epoch": 1.72, "learning_rate": 1.0120056951820067e-07, "logits/chosen": 0.014794771559536457, "logits/rejected": 0.042064860463142395, "logps/chosen": -205.46575927734375, "logps/rejected": -410.8870544433594, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -2.0027832984924316, "rewards/margins": 18.52570343017578, "rewards/rejected": -20.528486251831055, "step": 5039 }, { "epoch": 1.72, "learning_rate": 1.0095842164827429e-07, "logits/chosen": 0.07526427507400513, "logits/rejected": 0.10362488776445389, "logps/chosen": -246.0583038330078, "logps/rejected": -403.21197509765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1290462017059326, "rewards/margins": 16.074565887451172, "rewards/rejected": -17.20361328125, "step": 5040 }, { "epoch": 1.72, "learning_rate": 1.0071654842314292e-07, "logits/chosen": 0.07965227961540222, "logits/rejected": 0.11593544483184814, "logps/chosen": -226.32113647460938, "logps/rejected": -376.3629150390625, "loss": 0.0148, "rewards/accuracies": 0.9375, "rewards/chosen": -1.616178035736084, "rewards/margins": 15.44039535522461, "rewards/rejected": -17.05657196044922, "step": 5041 }, { "epoch": 1.72, "learning_rate": 1.004749499166957e-07, "logits/chosen": 0.18071776628494263, "logits/rejected": 0.22080068290233612, "logps/chosen": -175.3578338623047, "logps/rejected": -298.0948791503906, "loss": 0.0808, "rewards/accuracies": 1.0, "rewards/chosen": 0.3273327052593231, "rewards/margins": 15.327942848205566, "rewards/rejected": -15.0006103515625, "step": 5042 }, { "epoch": 1.72, "learning_rate": 1.0023362620273767e-07, "logits/chosen": 0.038755714893341064, "logits/rejected": 0.11895084381103516, "logps/chosen": -255.71392822265625, "logps/rejected": -297.0461730957031, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.5832263231277466, "rewards/margins": 13.735097885131836, "rewards/rejected": -15.31832504272461, "step": 5043 }, { "epoch": 1.72, "learning_rate": 9.999257735498956e-08, "logits/chosen": -0.00045166793279349804, "logits/rejected": 0.04588274285197258, "logps/chosen": -213.05123901367188, "logps/rejected": -359.0757141113281, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.18035951256752014, "rewards/margins": 18.023515701293945, "rewards/rejected": -18.203876495361328, "step": 5044 }, { "epoch": 1.72, "learning_rate": 9.975180344708878e-08, "logits/chosen": 0.004832355305552483, "logits/rejected": 0.016476280987262726, "logps/chosen": -246.39866638183594, "logps/rejected": -383.28564453125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.322714924812317, "rewards/margins": 16.81780242919922, "rewards/rejected": -18.140518188476562, "step": 5045 }, { "epoch": 1.72, "learning_rate": 9.951130455258827e-08, "logits/chosen": 0.08555946499109268, "logits/rejected": 0.15297280251979828, "logps/chosen": -183.1527862548828, "logps/rejected": -257.87701416015625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6515552997589111, "rewards/margins": 13.955648422241211, "rewards/rejected": -15.60720443725586, "step": 5046 }, { "epoch": 1.72, "learning_rate": 9.92710807449575e-08, "logits/chosen": 0.06034495681524277, "logits/rejected": 0.05854879319667816, "logps/chosen": -183.3802947998047, "logps/rejected": -373.89642333984375, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -1.299681305885315, "rewards/margins": 16.59685707092285, "rewards/rejected": -17.89653778076172, "step": 5047 }, { "epoch": 1.72, "learning_rate": 9.903113209758096e-08, "logits/chosen": 0.07030341029167175, "logits/rejected": 0.0903584361076355, "logps/chosen": -208.56710815429688, "logps/rejected": -405.2861022949219, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9717689752578735, "rewards/margins": 18.54672622680664, "rewards/rejected": -19.51849365234375, "step": 5048 }, { "epoch": 1.72, "learning_rate": 9.879145868375993e-08, "logits/chosen": 0.045851461589336395, "logits/rejected": 0.08181874454021454, "logps/chosen": -163.75343322753906, "logps/rejected": -379.5631103515625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.9614005088806152, "rewards/margins": 18.34637451171875, "rewards/rejected": -20.307775497436523, "step": 5049 }, { "epoch": 1.72, "learning_rate": 9.85520605767115e-08, "logits/chosen": 0.06559766829013824, "logits/rejected": 0.09469541162252426, "logps/chosen": -214.0741729736328, "logps/rejected": -333.944091796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5129204988479614, "rewards/margins": 14.610721588134766, "rewards/rejected": -15.123641967773438, "step": 5050 }, { "epoch": 1.72, "learning_rate": 9.831293784956818e-08, "logits/chosen": 0.08990051597356796, "logits/rejected": 0.15394878387451172, "logps/chosen": -157.34852600097656, "logps/rejected": -226.2081298828125, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -1.0456641912460327, "rewards/margins": 13.063728332519531, "rewards/rejected": -14.109392166137695, "step": 5051 }, { "epoch": 1.72, "learning_rate": 9.807409057537874e-08, "logits/chosen": 0.1409875750541687, "logits/rejected": 0.1633004993200302, "logps/chosen": -263.9258117675781, "logps/rejected": -406.1534729003906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.2262110710144043, "rewards/margins": 17.609573364257812, "rewards/rejected": -19.835784912109375, "step": 5052 }, { "epoch": 1.72, "learning_rate": 9.783551882710794e-08, "logits/chosen": 0.11196856200695038, "logits/rejected": 0.15353655815124512, "logps/chosen": -136.21441650390625, "logps/rejected": -278.27685546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0100595951080322, "rewards/margins": 15.410244941711426, "rewards/rejected": -16.420305252075195, "step": 5053 }, { "epoch": 1.72, "learning_rate": 9.759722267763571e-08, "logits/chosen": 0.06573484092950821, "logits/rejected": 0.11014656722545624, "logps/chosen": -262.42071533203125, "logps/rejected": -362.48162841796875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 0.20694410800933838, "rewards/margins": 16.377216339111328, "rewards/rejected": -16.170272827148438, "step": 5054 }, { "epoch": 1.73, "learning_rate": 9.73592021997588e-08, "logits/chosen": 0.0885874480009079, "logits/rejected": 0.16160480678081512, "logps/chosen": -223.4499969482422, "logps/rejected": -291.576171875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.3786691427230835, "rewards/margins": 14.899947166442871, "rewards/rejected": -16.278615951538086, "step": 5055 }, { "epoch": 1.73, "learning_rate": 9.712145746618871e-08, "logits/chosen": 0.12372040748596191, "logits/rejected": 0.13354183733463287, "logps/chosen": -137.16127014160156, "logps/rejected": -290.661865234375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2504948377609253, "rewards/margins": 12.254253387451172, "rewards/rejected": -13.50474739074707, "step": 5056 }, { "epoch": 1.73, "learning_rate": 9.688398854955349e-08, "logits/chosen": 0.14671117067337036, "logits/rejected": 0.16475267708301544, "logps/chosen": -154.18081665039062, "logps/rejected": -291.04327392578125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.6292660236358643, "rewards/margins": 13.99774169921875, "rewards/rejected": -16.62700653076172, "step": 5057 }, { "epoch": 1.73, "learning_rate": 9.664679552239663e-08, "logits/chosen": -0.12514938414096832, "logits/rejected": -0.09273792803287506, "logps/chosen": -229.08412170410156, "logps/rejected": -367.50103759765625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.5097546577453613, "rewards/margins": 14.997678756713867, "rewards/rejected": -17.50743293762207, "step": 5058 }, { "epoch": 1.73, "learning_rate": 9.640987845717696e-08, "logits/chosen": 0.010308509692549706, "logits/rejected": 0.03638262301683426, "logps/chosen": -215.64028930664062, "logps/rejected": -378.4047546386719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6162073612213135, "rewards/margins": 17.269365310668945, "rewards/rejected": -17.885570526123047, "step": 5059 }, { "epoch": 1.73, "learning_rate": 9.617323742626959e-08, "logits/chosen": 0.023715710267424583, "logits/rejected": 0.05352044105529785, "logps/chosen": -208.07525634765625, "logps/rejected": -336.4693603515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.1039609909057617, "rewards/margins": 15.526830673217773, "rewards/rejected": -17.63079261779785, "step": 5060 }, { "epoch": 1.73, "learning_rate": 9.593687250196514e-08, "logits/chosen": 0.015782641246914864, "logits/rejected": 0.033702488988637924, "logps/chosen": -191.3901824951172, "logps/rejected": -369.91217041015625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.9611329436302185, "rewards/margins": 17.91403579711914, "rewards/rejected": -18.87516975402832, "step": 5061 }, { "epoch": 1.73, "learning_rate": 9.570078375647006e-08, "logits/chosen": -0.03930405154824257, "logits/rejected": -0.0016079738270491362, "logps/chosen": -221.1540985107422, "logps/rejected": -371.7340393066406, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.8559720516204834, "rewards/margins": 16.376588821411133, "rewards/rejected": -18.232563018798828, "step": 5062 }, { "epoch": 1.73, "learning_rate": 9.546497126190567e-08, "logits/chosen": 0.03900941088795662, "logits/rejected": 0.06542733311653137, "logps/chosen": -179.47335815429688, "logps/rejected": -370.8576965332031, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -2.4875848293304443, "rewards/margins": 16.629953384399414, "rewards/rejected": -19.117538452148438, "step": 5063 }, { "epoch": 1.73, "learning_rate": 9.522943509030968e-08, "logits/chosen": 0.01576935313642025, "logits/rejected": 0.056417617946863174, "logps/chosen": -234.9794464111328, "logps/rejected": -466.0345764160156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7312312126159668, "rewards/margins": 23.375572204589844, "rewards/rejected": -24.106801986694336, "step": 5064 }, { "epoch": 1.73, "learning_rate": 9.49941753136354e-08, "logits/chosen": 0.11444931477308273, "logits/rejected": 0.13537739217281342, "logps/chosen": -126.66349029541016, "logps/rejected": -286.05206298828125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.916849970817566, "rewards/margins": 14.769231796264648, "rewards/rejected": -16.686080932617188, "step": 5065 }, { "epoch": 1.73, "learning_rate": 9.475919200375105e-08, "logits/chosen": 0.07872698456048965, "logits/rejected": 0.09177862852811813, "logps/chosen": -197.3937225341797, "logps/rejected": -416.01080322265625, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -2.597128391265869, "rewards/margins": 18.335906982421875, "rewards/rejected": -20.93303871154785, "step": 5066 }, { "epoch": 1.73, "learning_rate": 9.452448523244083e-08, "logits/chosen": 0.05081220343708992, "logits/rejected": 0.06905917078256607, "logps/chosen": -256.3483581542969, "logps/rejected": -437.79461669921875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.5023729801177979, "rewards/margins": 17.453571319580078, "rewards/rejected": -18.955942153930664, "step": 5067 }, { "epoch": 1.73, "learning_rate": 9.429005507140486e-08, "logits/chosen": 0.11643614619970322, "logits/rejected": 0.11829808354377747, "logps/chosen": -103.66561889648438, "logps/rejected": -258.2964782714844, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.8138954639434814, "rewards/margins": 12.48137092590332, "rewards/rejected": -14.295266151428223, "step": 5068 }, { "epoch": 1.73, "learning_rate": 9.405590159225751e-08, "logits/chosen": 0.027426211163401604, "logits/rejected": 0.08498641103506088, "logps/chosen": -252.7471923828125, "logps/rejected": -294.9692077636719, "loss": 0.0201, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5501612424850464, "rewards/margins": 13.235244750976562, "rewards/rejected": -13.785406112670898, "step": 5069 }, { "epoch": 1.73, "learning_rate": 9.382202486653035e-08, "logits/chosen": -0.02198687195777893, "logits/rejected": -0.006074891425669193, "logps/chosen": -209.5478515625, "logps/rejected": -432.27764892578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4774564504623413, "rewards/margins": 19.600086212158203, "rewards/rejected": -21.077539443969727, "step": 5070 }, { "epoch": 1.73, "learning_rate": 9.358842496566888e-08, "logits/chosen": 0.0072044674307107925, "logits/rejected": 0.06420332938432693, "logps/chosen": -211.2360076904297, "logps/rejected": -400.5838928222656, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -1.7771884202957153, "rewards/margins": 20.779226303100586, "rewards/rejected": -22.556415557861328, "step": 5071 }, { "epoch": 1.73, "learning_rate": 9.33551019610348e-08, "logits/chosen": 0.024907181039452553, "logits/rejected": 0.040737904608249664, "logps/chosen": -248.16580200195312, "logps/rejected": -445.4861755371094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5575581192970276, "rewards/margins": 18.722667694091797, "rewards/rejected": -19.28022575378418, "step": 5072 }, { "epoch": 1.73, "learning_rate": 9.312205592390532e-08, "logits/chosen": 0.065687395632267, "logits/rejected": 0.08120177686214447, "logps/chosen": -199.53814697265625, "logps/rejected": -338.7520751953125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078994035720825, "rewards/margins": 16.11063575744629, "rewards/rejected": -17.618534088134766, "step": 5073 }, { "epoch": 1.73, "learning_rate": 9.288928692547238e-08, "logits/chosen": 0.12889768183231354, "logits/rejected": 0.11336375772953033, "logps/chosen": -155.8789520263672, "logps/rejected": -337.1629638671875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.997766137123108, "rewards/margins": 14.77656078338623, "rewards/rejected": -16.77432632446289, "step": 5074 }, { "epoch": 1.73, "learning_rate": 9.265679503684387e-08, "logits/chosen": 0.059715427458286285, "logits/rejected": 0.06522911787033081, "logps/chosen": -180.22076416015625, "logps/rejected": -403.1043395996094, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.8904327154159546, "rewards/margins": 20.466840744018555, "rewards/rejected": -21.35727310180664, "step": 5075 }, { "epoch": 1.73, "learning_rate": 9.24245803290431e-08, "logits/chosen": -0.08369775116443634, "logits/rejected": -0.05073944106698036, "logps/chosen": -261.9951477050781, "logps/rejected": -440.5828552246094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3587985038757324, "rewards/margins": 20.343168258666992, "rewards/rejected": -21.70196533203125, "step": 5076 }, { "epoch": 1.73, "learning_rate": 9.219264287300799e-08, "logits/chosen": 0.054964788258075714, "logits/rejected": 0.09514503180980682, "logps/chosen": -204.3741455078125, "logps/rejected": -331.7640075683594, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -1.8311357498168945, "rewards/margins": 15.242598533630371, "rewards/rejected": -17.0737361907959, "step": 5077 }, { "epoch": 1.73, "learning_rate": 9.19609827395924e-08, "logits/chosen": -0.08077946305274963, "logits/rejected": -0.05121030658483505, "logps/chosen": -199.60748291015625, "logps/rejected": -381.0510559082031, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1322834491729736, "rewards/margins": 17.47271728515625, "rewards/rejected": -18.60500144958496, "step": 5078 }, { "epoch": 1.73, "learning_rate": 9.172959999956543e-08, "logits/chosen": 0.007515008095651865, "logits/rejected": 0.04486991837620735, "logps/chosen": -227.8666229248047, "logps/rejected": -436.8674621582031, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.7328170537948608, "rewards/margins": 22.787405014038086, "rewards/rejected": -23.52022361755371, "step": 5079 }, { "epoch": 1.73, "learning_rate": 9.149849472361148e-08, "logits/chosen": -0.07346382737159729, "logits/rejected": -0.019429266452789307, "logps/chosen": -239.80181884765625, "logps/rejected": -326.37567138671875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2342175245285034, "rewards/margins": 15.630956649780273, "rewards/rejected": -16.865175247192383, "step": 5080 }, { "epoch": 1.73, "learning_rate": 9.126766698232957e-08, "logits/chosen": 0.03773997724056244, "logits/rejected": 0.10267765074968338, "logps/chosen": -233.11407470703125, "logps/rejected": -367.9890441894531, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9252539277076721, "rewards/margins": 19.60862922668457, "rewards/rejected": -20.53388214111328, "step": 5081 }, { "epoch": 1.73, "learning_rate": 9.103711684623461e-08, "logits/chosen": 0.0116538405418396, "logits/rejected": 0.03785672038793564, "logps/chosen": -237.8506622314453, "logps/rejected": -460.316162109375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.022569179534912, "rewards/margins": 22.366666793823242, "rewards/rejected": -24.389236450195312, "step": 5082 }, { "epoch": 1.73, "learning_rate": 9.08068443857567e-08, "logits/chosen": 0.03705558925867081, "logits/rejected": 0.0785556435585022, "logps/chosen": -184.7154541015625, "logps/rejected": -315.7728271484375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.72434401512146, "rewards/margins": 15.053633689880371, "rewards/rejected": -16.777976989746094, "step": 5083 }, { "epoch": 1.74, "learning_rate": 9.057684967124035e-08, "logits/chosen": 0.07799714803695679, "logits/rejected": 0.0778064951300621, "logps/chosen": -232.87242126464844, "logps/rejected": -433.2536926269531, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.3477423191070557, "rewards/margins": 17.482189178466797, "rewards/rejected": -19.829931259155273, "step": 5084 }, { "epoch": 1.74, "learning_rate": 9.034713277294636e-08, "logits/chosen": -0.020385710522532463, "logits/rejected": 0.019976673647761345, "logps/chosen": -219.30426025390625, "logps/rejected": -326.4358825683594, "loss": 0.0313, "rewards/accuracies": 0.9375, "rewards/chosen": -1.971583366394043, "rewards/margins": 13.874515533447266, "rewards/rejected": -15.846097946166992, "step": 5085 }, { "epoch": 1.74, "learning_rate": 9.011769376104994e-08, "logits/chosen": -0.008310592733323574, "logits/rejected": 0.03662148490548134, "logps/chosen": -216.9853515625, "logps/rejected": -395.79119873046875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.9788614511489868, "rewards/margins": 19.58631706237793, "rewards/rejected": -20.56517791748047, "step": 5086 }, { "epoch": 1.74, "learning_rate": 8.988853270564101e-08, "logits/chosen": -0.03586849942803383, "logits/rejected": -0.011447801254689693, "logps/chosen": -221.28176879882812, "logps/rejected": -391.6768798828125, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -0.4210006594657898, "rewards/margins": 17.579891204833984, "rewards/rejected": -18.000892639160156, "step": 5087 }, { "epoch": 1.74, "learning_rate": 8.96596496767259e-08, "logits/chosen": 0.08233743160963058, "logits/rejected": 0.10427012294530869, "logps/chosen": -191.14688110351562, "logps/rejected": -323.4033508300781, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.0136464834213257, "rewards/margins": 14.598550796508789, "rewards/rejected": -15.612197875976562, "step": 5088 }, { "epoch": 1.74, "learning_rate": 8.94310447442247e-08, "logits/chosen": 0.030255233868956566, "logits/rejected": 0.0373518131673336, "logps/chosen": -173.90640258789062, "logps/rejected": -396.9974060058594, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -1.9023613929748535, "rewards/margins": 18.047849655151367, "rewards/rejected": -19.950210571289062, "step": 5089 }, { "epoch": 1.74, "learning_rate": 8.920271797797319e-08, "logits/chosen": 0.03036332316696644, "logits/rejected": 0.06364265084266663, "logps/chosen": -243.40774536132812, "logps/rejected": -393.93035888671875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -2.457359552383423, "rewards/margins": 15.185934066772461, "rewards/rejected": -17.643293380737305, "step": 5090 }, { "epoch": 1.74, "learning_rate": 8.897466944772224e-08, "logits/chosen": 0.013020072132349014, "logits/rejected": 0.051113005727529526, "logps/chosen": -273.8795166015625, "logps/rejected": -353.54241943359375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.263631820678711, "rewards/margins": 14.237377166748047, "rewards/rejected": -15.501009941101074, "step": 5091 }, { "epoch": 1.74, "learning_rate": 8.874689922313716e-08, "logits/chosen": -0.027286797761917114, "logits/rejected": 0.011644219048321247, "logps/chosen": -222.775390625, "logps/rejected": -334.40142822265625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.260141134262085, "rewards/margins": 16.077516555786133, "rewards/rejected": -17.337657928466797, "step": 5092 }, { "epoch": 1.74, "learning_rate": 8.85194073737987e-08, "logits/chosen": 0.11339260637760162, "logits/rejected": 0.16602341830730438, "logps/chosen": -227.6938018798828, "logps/rejected": -295.31915283203125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.612127423286438, "rewards/margins": 14.748296737670898, "rewards/rejected": -15.360424041748047, "step": 5093 }, { "epoch": 1.74, "learning_rate": 8.829219396920274e-08, "logits/chosen": 0.02949810028076172, "logits/rejected": 0.027878234162926674, "logps/chosen": -228.940673828125, "logps/rejected": -474.1557312011719, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.00973394513130188, "rewards/margins": 21.419212341308594, "rewards/rejected": -21.40947723388672, "step": 5094 }, { "epoch": 1.74, "learning_rate": 8.806525907875972e-08, "logits/chosen": 0.007218701299279928, "logits/rejected": 0.041991282254457474, "logps/chosen": -224.1925811767578, "logps/rejected": -364.7989501953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.011550188064575, "rewards/margins": 16.595142364501953, "rewards/rejected": -18.606689453125, "step": 5095 }, { "epoch": 1.74, "learning_rate": 8.7838602771795e-08, "logits/chosen": 0.09421258419752121, "logits/rejected": 0.11792667210102081, "logps/chosen": -203.74571228027344, "logps/rejected": -312.3279113769531, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.6059858798980713, "rewards/margins": 12.9033784866333, "rewards/rejected": -14.509363174438477, "step": 5096 }, { "epoch": 1.74, "learning_rate": 8.761222511754896e-08, "logits/chosen": 0.0322754867374897, "logits/rejected": 0.06647935509681702, "logps/chosen": -212.30892944335938, "logps/rejected": -382.03936767578125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.7530883550643921, "rewards/margins": 18.43812370300293, "rewards/rejected": -19.191211700439453, "step": 5097 }, { "epoch": 1.74, "learning_rate": 8.738612618517716e-08, "logits/chosen": 0.03676441311836243, "logits/rejected": 0.06598737835884094, "logps/chosen": -191.67587280273438, "logps/rejected": -293.04742431640625, "loss": 0.0467, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2513017654418945, "rewards/margins": 12.081619262695312, "rewards/rejected": -14.332921981811523, "step": 5098 }, { "epoch": 1.74, "learning_rate": 8.716030604374914e-08, "logits/chosen": 0.09116470068693161, "logits/rejected": 0.13515347242355347, "logps/chosen": -276.2053527832031, "logps/rejected": -430.30499267578125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.8929662704467773, "rewards/margins": 18.27545166015625, "rewards/rejected": -22.168420791625977, "step": 5099 }, { "epoch": 1.74, "learning_rate": 8.693476476225037e-08, "logits/chosen": 0.13003359735012054, "logits/rejected": 0.1686689555644989, "logps/chosen": -158.32437133789062, "logps/rejected": -244.12745666503906, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.1931958198547363, "rewards/margins": 11.206716537475586, "rewards/rejected": -12.399910926818848, "step": 5100 }, { "epoch": 1.74, "learning_rate": 8.670950240958053e-08, "logits/chosen": 0.16242985427379608, "logits/rejected": 0.20211568474769592, "logps/chosen": -157.61257934570312, "logps/rejected": -259.4184265136719, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.1086013317108154, "rewards/margins": 14.303115844726562, "rewards/rejected": -16.411718368530273, "step": 5101 }, { "epoch": 1.74, "learning_rate": 8.648451905455357e-08, "logits/chosen": 0.05134330689907074, "logits/rejected": 0.08818565309047699, "logps/chosen": -172.29989624023438, "logps/rejected": -300.7616882324219, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.3018665313720703, "rewards/margins": 15.143707275390625, "rewards/rejected": -17.445573806762695, "step": 5102 }, { "epoch": 1.74, "learning_rate": 8.625981476589972e-08, "logits/chosen": -0.009769576601684093, "logits/rejected": 0.033743780106306076, "logps/chosen": -177.24725341796875, "logps/rejected": -270.4341735839844, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.6821584701538086, "rewards/margins": 12.91484260559082, "rewards/rejected": -14.597001075744629, "step": 5103 }, { "epoch": 1.74, "learning_rate": 8.60353896122623e-08, "logits/chosen": 0.07585236430168152, "logits/rejected": 0.11717540770769119, "logps/chosen": -213.045166015625, "logps/rejected": -317.0289306640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1850286722183228, "rewards/margins": 14.345867156982422, "rewards/rejected": -15.53089714050293, "step": 5104 }, { "epoch": 1.74, "learning_rate": 8.581124366220039e-08, "logits/chosen": 0.14635759592056274, "logits/rejected": 0.16797567903995514, "logps/chosen": -203.56280517578125, "logps/rejected": -360.6851806640625, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -3.2565791606903076, "rewards/margins": 16.18897819519043, "rewards/rejected": -19.445556640625, "step": 5105 }, { "epoch": 1.74, "learning_rate": 8.55873769841876e-08, "logits/chosen": -0.021523553878068924, "logits/rejected": -0.01020602136850357, "logps/chosen": -190.35313415527344, "logps/rejected": -390.496826171875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.9562742710113525, "rewards/margins": 19.396728515625, "rewards/rejected": -21.353004455566406, "step": 5106 }, { "epoch": 1.74, "learning_rate": 8.536378964661183e-08, "logits/chosen": -0.11706636846065521, "logits/rejected": -0.1036432534456253, "logps/chosen": -255.35284423828125, "logps/rejected": -465.67449951171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.144442081451416, "rewards/margins": 19.745832443237305, "rewards/rejected": -21.89027214050293, "step": 5107 }, { "epoch": 1.74, "learning_rate": 8.514048171777611e-08, "logits/chosen": 0.08876868337392807, "logits/rejected": 0.1241491362452507, "logps/chosen": -192.63888549804688, "logps/rejected": -364.9735412597656, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.5092636346817017, "rewards/margins": 18.982698440551758, "rewards/rejected": -20.491966247558594, "step": 5108 }, { "epoch": 1.74, "learning_rate": 8.491745326589784e-08, "logits/chosen": 0.1392267942428589, "logits/rejected": 0.15380439162254333, "logps/chosen": -182.1984405517578, "logps/rejected": -324.77203369140625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.162087917327881, "rewards/margins": 14.937904357910156, "rewards/rejected": -17.099990844726562, "step": 5109 }, { "epoch": 1.74, "learning_rate": 8.469470435910931e-08, "logits/chosen": 0.05192249268293381, "logits/rejected": 0.0721982941031456, "logps/chosen": -210.6704559326172, "logps/rejected": -370.3733215332031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.563000202178955, "rewards/margins": 17.620941162109375, "rewards/rejected": -19.183942794799805, "step": 5110 }, { "epoch": 1.74, "learning_rate": 8.447223506545698e-08, "logits/chosen": 0.020349552854895592, "logits/rejected": 0.033828865736722946, "logps/chosen": -214.1536865234375, "logps/rejected": -407.4190979003906, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -0.9987099170684814, "rewards/margins": 18.503055572509766, "rewards/rejected": -19.501766204833984, "step": 5111 }, { "epoch": 1.74, "learning_rate": 8.425004545290226e-08, "logits/chosen": 0.15643355250358582, "logits/rejected": 0.17756199836730957, "logps/chosen": -148.97833251953125, "logps/rejected": -329.94488525390625, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -2.1778690814971924, "rewards/margins": 16.46179962158203, "rewards/rejected": -18.639667510986328, "step": 5112 }, { "epoch": 1.75, "learning_rate": 8.40281355893212e-08, "logits/chosen": -0.09679602086544037, "logits/rejected": -0.0754123255610466, "logps/chosen": -234.71214294433594, "logps/rejected": -432.3983154296875, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.0262395143508911, "rewards/margins": 17.31618309020996, "rewards/rejected": -18.342422485351562, "step": 5113 }, { "epoch": 1.75, "learning_rate": 8.38065055425039e-08, "logits/chosen": 0.09111547470092773, "logits/rejected": 0.11173483729362488, "logps/chosen": -189.12295532226562, "logps/rejected": -405.7366027832031, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6950287818908691, "rewards/margins": 18.60744857788086, "rewards/rejected": -20.302478790283203, "step": 5114 }, { "epoch": 1.75, "learning_rate": 8.358515538015542e-08, "logits/chosen": 0.0493110716342926, "logits/rejected": 0.07652437686920166, "logps/chosen": -205.8289031982422, "logps/rejected": -312.6707458496094, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -1.046141266822815, "rewards/margins": 14.037022590637207, "rewards/rejected": -15.083165168762207, "step": 5115 }, { "epoch": 1.75, "learning_rate": 8.336408516989535e-08, "logits/chosen": 0.03324661776423454, "logits/rejected": 0.08033908903598785, "logps/chosen": -226.16268920898438, "logps/rejected": -353.7611083984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.345522880554199, "rewards/margins": 16.65306854248047, "rewards/rejected": -18.998592376708984, "step": 5116 }, { "epoch": 1.75, "learning_rate": 8.314329497925699e-08, "logits/chosen": 0.002879692707210779, "logits/rejected": 0.03695166856050491, "logps/chosen": -199.3798828125, "logps/rejected": -330.1274108886719, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.3785332441329956, "rewards/margins": 14.169631004333496, "rewards/rejected": -15.548165321350098, "step": 5117 }, { "epoch": 1.75, "learning_rate": 8.292278487568949e-08, "logits/chosen": -0.018659882247447968, "logits/rejected": -0.000676955096423626, "logps/chosen": -211.85337829589844, "logps/rejected": -407.9978942871094, "loss": 0.0134, "rewards/accuracies": 0.9375, "rewards/chosen": -2.016960620880127, "rewards/margins": 19.304513931274414, "rewards/rejected": -21.321474075317383, "step": 5118 }, { "epoch": 1.75, "learning_rate": 8.270255492655532e-08, "logits/chosen": -0.09428678452968597, "logits/rejected": -0.08249364048242569, "logps/chosen": -240.34767150878906, "logps/rejected": -361.92486572265625, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.6002631783485413, "rewards/margins": 16.04262924194336, "rewards/rejected": -16.642892837524414, "step": 5119 }, { "epoch": 1.75, "learning_rate": 8.24826051991312e-08, "logits/chosen": 0.023886652663350105, "logits/rejected": 0.05515976995229721, "logps/chosen": -193.036376953125, "logps/rejected": -347.14117431640625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.7862660884857178, "rewards/margins": 17.688356399536133, "rewards/rejected": -19.47462272644043, "step": 5120 }, { "epoch": 1.75, "learning_rate": 8.226293576060961e-08, "logits/chosen": 0.034224044531583786, "logits/rejected": 0.08494358509778976, "logps/chosen": -157.190673828125, "logps/rejected": -263.3592224121094, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.7591601610183716, "rewards/margins": 13.694364547729492, "rewards/rejected": -14.453524589538574, "step": 5121 }, { "epoch": 1.75, "learning_rate": 8.204354667809588e-08, "logits/chosen": 0.08864426612854004, "logits/rejected": 0.08662880957126617, "logps/chosen": -201.47789001464844, "logps/rejected": -402.80010986328125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.553715944290161, "rewards/margins": 17.126358032226562, "rewards/rejected": -19.680072784423828, "step": 5122 }, { "epoch": 1.75, "learning_rate": 8.18244380186105e-08, "logits/chosen": -0.0801212340593338, "logits/rejected": -0.02849770523607731, "logps/chosen": -186.61358642578125, "logps/rejected": -319.657470703125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.2750273942947388, "rewards/margins": 15.605820655822754, "rewards/rejected": -16.880847930908203, "step": 5123 }, { "epoch": 1.75, "learning_rate": 8.160560984908849e-08, "logits/chosen": 0.11101022362709045, "logits/rejected": 0.12151272594928741, "logps/chosen": -208.17544555664062, "logps/rejected": -339.0042724609375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.586662769317627, "rewards/margins": 14.835504531860352, "rewards/rejected": -16.422164916992188, "step": 5124 }, { "epoch": 1.75, "learning_rate": 8.138706223637825e-08, "logits/chosen": 0.08856550604104996, "logits/rejected": 0.12591639161109924, "logps/chosen": -227.91485595703125, "logps/rejected": -439.1705627441406, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.108968496322632, "rewards/margins": 21.779294967651367, "rewards/rejected": -23.888263702392578, "step": 5125 }, { "epoch": 1.75, "learning_rate": 8.116879524724352e-08, "logits/chosen": 0.15262256562709808, "logits/rejected": 0.17205634713172913, "logps/chosen": -174.90267944335938, "logps/rejected": -298.638427734375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.168875217437744, "rewards/margins": 14.007759094238281, "rewards/rejected": -16.176633834838867, "step": 5126 }, { "epoch": 1.75, "learning_rate": 8.095080894836159e-08, "logits/chosen": -0.0199959184974432, "logits/rejected": 0.011194526217877865, "logps/chosen": -206.84381103515625, "logps/rejected": -340.111572265625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671598434448242, "rewards/margins": 16.01405143737793, "rewards/rejected": -17.88121223449707, "step": 5127 }, { "epoch": 1.75, "learning_rate": 8.073310340632455e-08, "logits/chosen": 0.04955563321709633, "logits/rejected": 0.08491447567939758, "logps/chosen": -226.7174835205078, "logps/rejected": -398.21868896484375, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -0.3146018981933594, "rewards/margins": 18.064619064331055, "rewards/rejected": -18.379222869873047, "step": 5128 }, { "epoch": 1.75, "learning_rate": 8.051567868763809e-08, "logits/chosen": 0.1025240421295166, "logits/rejected": 0.11602434515953064, "logps/chosen": -173.69886779785156, "logps/rejected": -368.8392333984375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2489607334136963, "rewards/margins": 15.395126342773438, "rewards/rejected": -17.644086837768555, "step": 5129 }, { "epoch": 1.75, "learning_rate": 8.029853485872262e-08, "logits/chosen": -0.021548878401517868, "logits/rejected": 0.01449582539498806, "logps/chosen": -164.14016723632812, "logps/rejected": -286.3479309082031, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.1572306156158447, "rewards/margins": 15.076253890991211, "rewards/rejected": -16.233484268188477, "step": 5130 }, { "epoch": 1.75, "learning_rate": 8.008167198591287e-08, "logits/chosen": 0.20048309862613678, "logits/rejected": 0.22562015056610107, "logps/chosen": -198.22772216796875, "logps/rejected": -345.9683837890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.995476245880127, "rewards/margins": 16.312002182006836, "rewards/rejected": -18.307479858398438, "step": 5131 }, { "epoch": 1.75, "learning_rate": 7.986509013545673e-08, "logits/chosen": -0.03055305778980255, "logits/rejected": -0.025585340335965157, "logps/chosen": -198.91610717773438, "logps/rejected": -406.5946350097656, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.8490034341812134, "rewards/margins": 17.467514038085938, "rewards/rejected": -18.31652069091797, "step": 5132 }, { "epoch": 1.75, "learning_rate": 7.964878937351771e-08, "logits/chosen": 0.0061285230331122875, "logits/rejected": 0.03393568471074104, "logps/chosen": -200.85792541503906, "logps/rejected": -376.079345703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2092373371124268, "rewards/margins": 17.7064151763916, "rewards/rejected": -18.915653228759766, "step": 5133 }, { "epoch": 1.75, "learning_rate": 7.943276976617252e-08, "logits/chosen": 0.05481573939323425, "logits/rejected": 0.10183735936880112, "logps/chosen": -163.420654296875, "logps/rejected": -325.15826416015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.060453414916992, "rewards/margins": 17.477027893066406, "rewards/rejected": -19.5374813079834, "step": 5134 }, { "epoch": 1.75, "learning_rate": 7.921703137941171e-08, "logits/chosen": -0.03860097378492355, "logits/rejected": 0.02660970203578472, "logps/chosen": -258.2950134277344, "logps/rejected": -336.071533203125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.8985883593559265, "rewards/margins": 13.883179664611816, "rewards/rejected": -14.781766891479492, "step": 5135 }, { "epoch": 1.75, "learning_rate": 7.9001574279141e-08, "logits/chosen": -0.018231166526675224, "logits/rejected": 0.017973436042666435, "logps/chosen": -193.0157470703125, "logps/rejected": -334.0723571777344, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.6455495357513428, "rewards/margins": 16.027023315429688, "rewards/rejected": -17.67257308959961, "step": 5136 }, { "epoch": 1.75, "learning_rate": 7.878639853117918e-08, "logits/chosen": 0.06419859826564789, "logits/rejected": 0.08718863129615784, "logps/chosen": -154.6171112060547, "logps/rejected": -333.04083251953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.880926489830017, "rewards/margins": 15.496305465698242, "rewards/rejected": -17.37723159790039, "step": 5137 }, { "epoch": 1.75, "learning_rate": 7.857150420125958e-08, "logits/chosen": -0.03496229648590088, "logits/rejected": -3.3647924283286557e-05, "logps/chosen": -223.14639282226562, "logps/rejected": -372.913818359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.9664971828460693, "rewards/margins": 16.466310501098633, "rewards/rejected": -18.43280792236328, "step": 5138 }, { "epoch": 1.75, "learning_rate": 7.835689135502966e-08, "logits/chosen": 0.09293662011623383, "logits/rejected": 0.13470502197742462, "logps/chosen": -193.8961181640625, "logps/rejected": -305.9588317871094, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.0333261489868164, "rewards/margins": 15.296019554138184, "rewards/rejected": -17.329345703125, "step": 5139 }, { "epoch": 1.75, "learning_rate": 7.81425600580502e-08, "logits/chosen": -0.015684155747294426, "logits/rejected": 0.004744320642203093, "logps/chosen": -196.0205841064453, "logps/rejected": -380.43890380859375, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -0.3890802562236786, "rewards/margins": 18.122634887695312, "rewards/rejected": -18.511714935302734, "step": 5140 }, { "epoch": 1.75, "learning_rate": 7.792851037579684e-08, "logits/chosen": -0.05318200960755348, "logits/rejected": -0.016302987933158875, "logps/chosen": -242.1343994140625, "logps/rejected": -394.0423278808594, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.465208053588867, "rewards/margins": 15.041522979736328, "rewards/rejected": -18.506731033325195, "step": 5141 }, { "epoch": 1.75, "learning_rate": 7.77147423736586e-08, "logits/chosen": 0.06749558448791504, "logits/rejected": 0.09999383240938187, "logps/chosen": -212.24862670898438, "logps/rejected": -400.63818359375, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.5942776203155518, "rewards/margins": 20.612972259521484, "rewards/rejected": -22.20724868774414, "step": 5142 }, { "epoch": 1.76, "learning_rate": 7.750125611693903e-08, "logits/chosen": 0.14612407982349396, "logits/rejected": 0.15036413073539734, "logps/chosen": -166.399658203125, "logps/rejected": -326.13812255859375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.337451696395874, "rewards/margins": 14.722966194152832, "rewards/rejected": -17.0604190826416, "step": 5143 }, { "epoch": 1.76, "learning_rate": 7.728805167085462e-08, "logits/chosen": 0.13369007408618927, "logits/rejected": 0.1642712652683258, "logps/chosen": -239.28372192382812, "logps/rejected": -380.84600830078125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.131535053253174, "rewards/margins": 18.158428192138672, "rewards/rejected": -20.289962768554688, "step": 5144 }, { "epoch": 1.76, "learning_rate": 7.707512910053681e-08, "logits/chosen": 0.07705662399530411, "logits/rejected": 0.09991776943206787, "logps/chosen": -159.1663055419922, "logps/rejected": -336.2572326660156, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4860215187072754, "rewards/margins": 15.944034576416016, "rewards/rejected": -17.430057525634766, "step": 5145 }, { "epoch": 1.76, "learning_rate": 7.686248847103072e-08, "logits/chosen": -0.008405512198805809, "logits/rejected": 0.0009220248321071267, "logps/chosen": -220.75991821289062, "logps/rejected": -365.50225830078125, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -1.8939619064331055, "rewards/margins": 13.653338432312012, "rewards/rejected": -15.547301292419434, "step": 5146 }, { "epoch": 1.76, "learning_rate": 7.665012984729436e-08, "logits/chosen": 0.11869470775127411, "logits/rejected": 0.12850597500801086, "logps/chosen": -159.7560577392578, "logps/rejected": -356.2348937988281, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.0420122146606445, "rewards/margins": 17.24570083618164, "rewards/rejected": -19.2877140045166, "step": 5147 }, { "epoch": 1.76, "learning_rate": 7.643805329420116e-08, "logits/chosen": 0.0502745546400547, "logits/rejected": 0.08300159871578217, "logps/chosen": -191.08908081054688, "logps/rejected": -364.4103088378906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.0050811767578125, "rewards/margins": 16.663225173950195, "rewards/rejected": -19.668306350708008, "step": 5148 }, { "epoch": 1.76, "learning_rate": 7.622625887653722e-08, "logits/chosen": 0.062290336936712265, "logits/rejected": 0.1187523677945137, "logps/chosen": -200.93179321289062, "logps/rejected": -357.14410400390625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.40142822265625, "rewards/margins": 15.709486961364746, "rewards/rejected": -18.110916137695312, "step": 5149 }, { "epoch": 1.76, "learning_rate": 7.601474665900254e-08, "logits/chosen": 0.044359978288412094, "logits/rejected": 0.06576194614171982, "logps/chosen": -241.85086059570312, "logps/rejected": -404.7196350097656, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.682394027709961, "rewards/margins": 17.613313674926758, "rewards/rejected": -20.29570770263672, "step": 5150 }, { "epoch": 1.76, "learning_rate": 7.580351670621177e-08, "logits/chosen": 0.1373748481273651, "logits/rejected": 0.16464152932167053, "logps/chosen": -178.66221618652344, "logps/rejected": -307.9463195800781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.5610644817352295, "rewards/margins": 14.329912185668945, "rewards/rejected": -16.890975952148438, "step": 5151 }, { "epoch": 1.76, "learning_rate": 7.559256908269251e-08, "logits/chosen": -0.02460428513586521, "logits/rejected": 0.030886536464095116, "logps/chosen": -248.52059936523438, "logps/rejected": -395.8726501464844, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.7442644834518433, "rewards/margins": 18.879318237304688, "rewards/rejected": -19.623584747314453, "step": 5152 }, { "epoch": 1.76, "learning_rate": 7.538190385288601e-08, "logits/chosen": 0.05350271984934807, "logits/rejected": 0.07930655032396317, "logps/chosen": -181.18206787109375, "logps/rejected": -349.255126953125, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.0523314476013184, "rewards/margins": 17.050067901611328, "rewards/rejected": -19.102399826049805, "step": 5153 }, { "epoch": 1.76, "learning_rate": 7.51715210811481e-08, "logits/chosen": 0.006658974103629589, "logits/rejected": 0.04897112399339676, "logps/chosen": -244.06268310546875, "logps/rejected": -372.77423095703125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.6969282627105713, "rewards/margins": 14.448908805847168, "rewards/rejected": -16.145837783813477, "step": 5154 }, { "epoch": 1.76, "learning_rate": 7.49614208317475e-08, "logits/chosen": 0.08871597051620483, "logits/rejected": 0.0943770557641983, "logps/chosen": -177.86376953125, "logps/rejected": -383.14923095703125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.8506776094436646, "rewards/margins": 16.958755493164062, "rewards/rejected": -17.809431076049805, "step": 5155 }, { "epoch": 1.76, "learning_rate": 7.475160316886697e-08, "logits/chosen": 0.1270788609981537, "logits/rejected": 0.13556918501853943, "logps/chosen": -173.74124145507812, "logps/rejected": -345.73095703125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1544337272644043, "rewards/margins": 16.671741485595703, "rewards/rejected": -17.826175689697266, "step": 5156 }, { "epoch": 1.76, "learning_rate": 7.454206815660313e-08, "logits/chosen": 0.10826026648283005, "logits/rejected": 0.11463933438062668, "logps/chosen": -98.2496109008789, "logps/rejected": -269.2711486816406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.7638182640075684, "rewards/margins": 13.289627075195312, "rewards/rejected": -15.053445816040039, "step": 5157 }, { "epoch": 1.76, "learning_rate": 7.433281585896579e-08, "logits/chosen": 0.0003864783502649516, "logits/rejected": 0.037812814116477966, "logps/chosen": -206.92837524414062, "logps/rejected": -294.098388671875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.645492672920227, "rewards/margins": 12.652020454406738, "rewards/rejected": -14.297513961791992, "step": 5158 }, { "epoch": 1.76, "learning_rate": 7.412384633987855e-08, "logits/chosen": -0.01312018558382988, "logits/rejected": 0.03393004834651947, "logps/chosen": -211.59068298339844, "logps/rejected": -380.4349060058594, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.9630095958709717, "rewards/margins": 17.594219207763672, "rewards/rejected": -19.557228088378906, "step": 5159 }, { "epoch": 1.76, "learning_rate": 7.391515966317908e-08, "logits/chosen": -0.00470458110794425, "logits/rejected": 0.02762330137193203, "logps/chosen": -186.9833526611328, "logps/rejected": -392.4883117675781, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.381277084350586, "rewards/margins": 19.650060653686523, "rewards/rejected": -22.031335830688477, "step": 5160 }, { "epoch": 1.76, "learning_rate": 7.37067558926181e-08, "logits/chosen": 0.14178480207920074, "logits/rejected": 0.17154884338378906, "logps/chosen": -181.27587890625, "logps/rejected": -335.42864990234375, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -0.4105011224746704, "rewards/margins": 16.346107482910156, "rewards/rejected": -16.756607055664062, "step": 5161 }, { "epoch": 1.76, "learning_rate": 7.349863509185994e-08, "logits/chosen": 0.08038544654846191, "logits/rejected": 0.10716396570205688, "logps/chosen": -159.99002075195312, "logps/rejected": -246.15234375, "loss": 0.0953, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2277696132659912, "rewards/margins": 12.008097648620605, "rewards/rejected": -13.23586654663086, "step": 5162 }, { "epoch": 1.76, "learning_rate": 7.329079732448285e-08, "logits/chosen": 0.07888990640640259, "logits/rejected": 0.1333460956811905, "logps/chosen": -209.86724853515625, "logps/rejected": -311.9598083496094, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.015255734324455261, "rewards/margins": 16.222970962524414, "rewards/rejected": -16.23822593688965, "step": 5163 }, { "epoch": 1.76, "learning_rate": 7.308324265397836e-08, "logits/chosen": 0.026808202266693115, "logits/rejected": 0.054096769541502, "logps/chosen": -165.43089294433594, "logps/rejected": -309.65606689453125, "loss": 0.0856, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0031964778900146, "rewards/margins": 16.381986618041992, "rewards/rejected": -18.385181427001953, "step": 5164 }, { "epoch": 1.76, "learning_rate": 7.287597114375122e-08, "logits/chosen": 0.12068916112184525, "logits/rejected": 0.1554710865020752, "logps/chosen": -168.22381591796875, "logps/rejected": -334.8138427734375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.1067886352539062, "rewards/margins": 15.362003326416016, "rewards/rejected": -17.468793869018555, "step": 5165 }, { "epoch": 1.76, "learning_rate": 7.266898285712065e-08, "logits/chosen": 0.09583033621311188, "logits/rejected": 0.12261854857206345, "logps/chosen": -196.27691650390625, "logps/rejected": -386.4571838378906, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.9155739545822144, "rewards/margins": 18.51584815979004, "rewards/rejected": -20.43142318725586, "step": 5166 }, { "epoch": 1.76, "learning_rate": 7.24622778573184e-08, "logits/chosen": -0.09084054827690125, "logits/rejected": -0.04507193714380264, "logps/chosen": -252.2180633544922, "logps/rejected": -405.909912109375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.399402379989624, "rewards/margins": 18.51025390625, "rewards/rejected": -19.909658432006836, "step": 5167 }, { "epoch": 1.76, "learning_rate": 7.225585620748952e-08, "logits/chosen": 0.03795705735683441, "logits/rejected": 0.05167228728532791, "logps/chosen": -208.77130126953125, "logps/rejected": -358.0230407714844, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.6147816181182861, "rewards/margins": 16.065555572509766, "rewards/rejected": -16.68033790588379, "step": 5168 }, { "epoch": 1.76, "learning_rate": 7.204971797069381e-08, "logits/chosen": -0.01509908027946949, "logits/rejected": -0.0026684352196753025, "logps/chosen": -190.50662231445312, "logps/rejected": -326.62274169921875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.677590847015381, "rewards/margins": 13.3389892578125, "rewards/rejected": -16.016578674316406, "step": 5169 }, { "epoch": 1.76, "learning_rate": 7.18438632099031e-08, "logits/chosen": 0.056914474815130234, "logits/rejected": 0.10105016827583313, "logps/chosen": -185.2465057373047, "logps/rejected": -253.68972778320312, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.6399907469749451, "rewards/margins": 12.886808395385742, "rewards/rejected": -13.526798248291016, "step": 5170 }, { "epoch": 1.76, "learning_rate": 7.163829198800342e-08, "logits/chosen": 0.08217640221118927, "logits/rejected": 0.10525578260421753, "logps/chosen": -204.1332244873047, "logps/rejected": -346.96612548828125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -2.212198495864868, "rewards/margins": 14.069625854492188, "rewards/rejected": -16.281824111938477, "step": 5171 }, { "epoch": 1.77, "learning_rate": 7.143300436779398e-08, "logits/chosen": 0.1274452656507492, "logits/rejected": 0.17059877514839172, "logps/chosen": -185.1603240966797, "logps/rejected": -276.0481262207031, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.4342767298221588, "rewards/margins": 16.61989974975586, "rewards/rejected": -17.054176330566406, "step": 5172 }, { "epoch": 1.77, "learning_rate": 7.122800041198696e-08, "logits/chosen": 0.025033554062247276, "logits/rejected": 0.043607935309410095, "logps/chosen": -193.99073791503906, "logps/rejected": -316.64544677734375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.7444558143615723, "rewards/margins": 11.893771171569824, "rewards/rejected": -14.638227462768555, "step": 5173 }, { "epoch": 1.77, "learning_rate": 7.102328018320858e-08, "logits/chosen": 0.16125385463237762, "logits/rejected": 0.18112722039222717, "logps/chosen": -156.26905822753906, "logps/rejected": -289.7328186035156, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.6142443418502808, "rewards/margins": 14.014310836791992, "rewards/rejected": -15.628556251525879, "step": 5174 }, { "epoch": 1.77, "learning_rate": 7.081884374399793e-08, "logits/chosen": 0.11007826030254364, "logits/rejected": 0.14423629641532898, "logps/chosen": -249.8936004638672, "logps/rejected": -427.27996826171875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.6973209381103516, "rewards/margins": 19.119205474853516, "rewards/rejected": -21.8165283203125, "step": 5175 }, { "epoch": 1.77, "learning_rate": 7.061469115680763e-08, "logits/chosen": -0.0008445812272839248, "logits/rejected": 0.027055123820900917, "logps/chosen": -219.92221069335938, "logps/rejected": -311.8632507324219, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.2233420610427856, "rewards/margins": 12.736154556274414, "rewards/rejected": -13.959497451782227, "step": 5176 }, { "epoch": 1.77, "learning_rate": 7.041082248400332e-08, "logits/chosen": 0.0773792639374733, "logits/rejected": 0.09968987107276917, "logps/chosen": -155.48635864257812, "logps/rejected": -297.572265625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.7216034531593323, "rewards/margins": 14.888195991516113, "rewards/rejected": -15.609798431396484, "step": 5177 }, { "epoch": 1.77, "learning_rate": 7.020723778786408e-08, "logits/chosen": 0.06632344424724579, "logits/rejected": 0.07558244466781616, "logps/chosen": -167.45089721679688, "logps/rejected": -340.599609375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.315556287765503, "rewards/margins": 15.334869384765625, "rewards/rejected": -18.650426864624023, "step": 5178 }, { "epoch": 1.77, "learning_rate": 7.000393713058239e-08, "logits/chosen": 0.07523173093795776, "logits/rejected": 0.08978405594825745, "logps/chosen": -219.29489135742188, "logps/rejected": -333.6370849609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.1719396114349365, "rewards/margins": 12.82925796508789, "rewards/rejected": -15.001198768615723, "step": 5179 }, { "epoch": 1.77, "learning_rate": 6.980092057426346e-08, "logits/chosen": 0.15785148739814758, "logits/rejected": 0.18578359484672546, "logps/chosen": -131.81385803222656, "logps/rejected": -249.6925048828125, "loss": 0.0443, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2189557552337646, "rewards/margins": 12.266642570495605, "rewards/rejected": -13.485597610473633, "step": 5180 }, { "epoch": 1.77, "learning_rate": 6.959818818092644e-08, "logits/chosen": 0.0887049064040184, "logits/rejected": 0.12684927880764008, "logps/chosen": -171.2271728515625, "logps/rejected": -276.27276611328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9255505800247192, "rewards/margins": 13.37614917755127, "rewards/rejected": -14.301698684692383, "step": 5181 }, { "epoch": 1.77, "learning_rate": 6.939574001250326e-08, "logits/chosen": -0.0033093492966145277, "logits/rejected": 0.023821432143449783, "logps/chosen": -248.74310302734375, "logps/rejected": -382.61846923828125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.1491981744766235, "rewards/margins": 17.925201416015625, "rewards/rejected": -19.07440185546875, "step": 5182 }, { "epoch": 1.77, "learning_rate": 6.919357613083843e-08, "logits/chosen": 0.07124576717615128, "logits/rejected": 0.07609699666500092, "logps/chosen": -177.19615173339844, "logps/rejected": -373.8804626464844, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.3281161785125732, "rewards/margins": 16.255279541015625, "rewards/rejected": -18.583396911621094, "step": 5183 }, { "epoch": 1.77, "learning_rate": 6.899169659769111e-08, "logits/chosen": -0.07562848180532455, "logits/rejected": -0.07477119565010071, "logps/chosen": -189.5461883544922, "logps/rejected": -350.4856262207031, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.7155678272247314, "rewards/margins": 13.198908805847168, "rewards/rejected": -15.91447639465332, "step": 5184 }, { "epoch": 1.77, "learning_rate": 6.879010147473207e-08, "logits/chosen": 0.05594709515571594, "logits/rejected": 0.10115575045347214, "logps/chosen": -243.06182861328125, "logps/rejected": -383.2682189941406, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.6477179527282715, "rewards/margins": 15.981411933898926, "rewards/rejected": -17.629131317138672, "step": 5185 }, { "epoch": 1.77, "learning_rate": 6.858879082354607e-08, "logits/chosen": 0.01384699996560812, "logits/rejected": 0.04879436269402504, "logps/chosen": -224.2961883544922, "logps/rejected": -442.7537841796875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -2.227668523788452, "rewards/margins": 22.96649169921875, "rewards/rejected": -25.19416046142578, "step": 5186 }, { "epoch": 1.77, "learning_rate": 6.83877647056309e-08, "logits/chosen": 0.06966276466846466, "logits/rejected": 0.09115290641784668, "logps/chosen": -152.13023376464844, "logps/rejected": -274.9575500488281, "loss": 0.0281, "rewards/accuracies": 0.875, "rewards/chosen": -2.2865664958953857, "rewards/margins": 13.541561126708984, "rewards/rejected": -15.82812786102295, "step": 5187 }, { "epoch": 1.77, "learning_rate": 6.818702318239689e-08, "logits/chosen": 0.10884351283311844, "logits/rejected": 0.15209810435771942, "logps/chosen": -216.0651397705078, "logps/rejected": -381.37994384765625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0550477504730225, "rewards/margins": 20.342121124267578, "rewards/rejected": -21.397171020507812, "step": 5188 }, { "epoch": 1.77, "learning_rate": 6.798656631516798e-08, "logits/chosen": 0.05857183784246445, "logits/rejected": 0.08839050680398941, "logps/chosen": -229.46078491210938, "logps/rejected": -385.9967041015625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.0942133441567421, "rewards/margins": 17.188940048217773, "rewards/rejected": -17.283153533935547, "step": 5189 }, { "epoch": 1.77, "learning_rate": 6.778639416518128e-08, "logits/chosen": 0.10976102203130722, "logits/rejected": 0.11452250182628632, "logps/chosen": -270.379638671875, "logps/rejected": -456.6953430175781, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3629992008209229, "rewards/margins": 17.40700340270996, "rewards/rejected": -18.770002365112305, "step": 5190 }, { "epoch": 1.77, "learning_rate": 6.758650679358602e-08, "logits/chosen": 0.1311890035867691, "logits/rejected": 0.1708403378725052, "logps/chosen": -195.58404541015625, "logps/rejected": -433.9349670410156, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -1.8130548000335693, "rewards/margins": 21.174718856811523, "rewards/rejected": -22.98777198791504, "step": 5191 }, { "epoch": 1.77, "learning_rate": 6.738690426144544e-08, "logits/chosen": -0.009744175709784031, "logits/rejected": 0.03217291831970215, "logps/chosen": -233.88621520996094, "logps/rejected": -368.48291015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.0760396718978882, "rewards/margins": 19.08148765563965, "rewards/rejected": -20.15752601623535, "step": 5192 }, { "epoch": 1.77, "learning_rate": 6.718758662973522e-08, "logits/chosen": 0.004013579338788986, "logits/rejected": 0.0463031530380249, "logps/chosen": -266.5675354003906, "logps/rejected": -434.67120361328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.7979979515075684, "rewards/margins": 20.482410430908203, "rewards/rejected": -23.280406951904297, "step": 5193 }, { "epoch": 1.77, "learning_rate": 6.698855395934433e-08, "logits/chosen": 0.0072839390486478806, "logits/rejected": 0.05295165628194809, "logps/chosen": -246.76182556152344, "logps/rejected": -439.996826171875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.6344635486602783, "rewards/margins": 21.6691837310791, "rewards/rejected": -23.30364990234375, "step": 5194 }, { "epoch": 1.77, "learning_rate": 6.678980631107423e-08, "logits/chosen": 0.04005806893110275, "logits/rejected": 0.07138340175151825, "logps/chosen": -197.44366455078125, "logps/rejected": -389.0831298828125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.910624384880066, "rewards/margins": 18.033519744873047, "rewards/rejected": -19.94414710998535, "step": 5195 }, { "epoch": 1.77, "learning_rate": 6.659134374563968e-08, "logits/chosen": 0.06019260361790657, "logits/rejected": 0.08734498918056488, "logps/chosen": -224.19000244140625, "logps/rejected": -414.7317810058594, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.8602126836776733, "rewards/margins": 19.95724105834961, "rewards/rejected": -20.817453384399414, "step": 5196 }, { "epoch": 1.77, "learning_rate": 6.639316632366853e-08, "logits/chosen": -0.05263660103082657, "logits/rejected": -0.030608048662543297, "logps/chosen": -255.3739013671875, "logps/rejected": -472.53759765625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.88995623588562, "rewards/margins": 20.00415802001953, "rewards/rejected": -22.89411163330078, "step": 5197 }, { "epoch": 1.77, "learning_rate": 6.619527410570069e-08, "logits/chosen": -0.0009969630045816302, "logits/rejected": 0.0153886079788208, "logps/chosen": -235.56085205078125, "logps/rejected": -376.9425354003906, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -3.6478726863861084, "rewards/margins": 12.788872718811035, "rewards/rejected": -16.436744689941406, "step": 5198 }, { "epoch": 1.77, "learning_rate": 6.599766715219013e-08, "logits/chosen": -0.00038164437864907086, "logits/rejected": 0.05854232236742973, "logps/chosen": -240.3184356689453, "logps/rejected": -361.9424743652344, "loss": 0.0209, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5157915353775024, "rewards/margins": 16.83452606201172, "rewards/rejected": -18.350317001342773, "step": 5199 }, { "epoch": 1.77, "learning_rate": 6.580034552350267e-08, "logits/chosen": 0.10432083904743195, "logits/rejected": 0.09787122905254364, "logps/chosen": -134.29220581054688, "logps/rejected": -300.7757568359375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -3.023345947265625, "rewards/margins": 12.567131042480469, "rewards/rejected": -15.590476036071777, "step": 5200 }, { "epoch": 1.78, "learning_rate": 6.56033092799172e-08, "logits/chosen": 0.020460180938243866, "logits/rejected": 0.05873269587755203, "logps/chosen": -210.83656311035156, "logps/rejected": -332.1536560058594, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6567361950874329, "rewards/margins": 16.61164665222168, "rewards/rejected": -17.26838493347168, "step": 5201 }, { "epoch": 1.78, "learning_rate": 6.540655848162601e-08, "logits/chosen": 0.12001429498195648, "logits/rejected": 0.11726740747690201, "logps/chosen": -172.00071716308594, "logps/rejected": -330.97515869140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0064468383789062, "rewards/margins": 15.376214981079102, "rewards/rejected": -16.382661819458008, "step": 5202 }, { "epoch": 1.78, "learning_rate": 6.521009318873349e-08, "logits/chosen": 0.0038557874504476786, "logits/rejected": 0.04823089763522148, "logps/chosen": -201.43983459472656, "logps/rejected": -375.8304443359375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.216400384902954, "rewards/margins": 17.260801315307617, "rewards/rejected": -19.477201461791992, "step": 5203 }, { "epoch": 1.78, "learning_rate": 6.501391346125706e-08, "logits/chosen": 0.18380460143089294, "logits/rejected": 0.18696968257427216, "logps/chosen": -152.0424041748047, "logps/rejected": -246.423095703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.6614664793014526, "rewards/margins": 9.877223014831543, "rewards/rejected": -11.538688659667969, "step": 5204 }, { "epoch": 1.78, "learning_rate": 6.481801935912711e-08, "logits/chosen": 0.021890392526984215, "logits/rejected": 0.03123626671731472, "logps/chosen": -235.8914794921875, "logps/rejected": -434.669677734375, "loss": 0.024, "rewards/accuracies": 0.9375, "rewards/chosen": -3.449371337890625, "rewards/margins": 17.760343551635742, "rewards/rejected": -21.209712982177734, "step": 5205 }, { "epoch": 1.78, "learning_rate": 6.462241094218634e-08, "logits/chosen": 0.06839273124933243, "logits/rejected": 0.1033685952425003, "logps/chosen": -165.960205078125, "logps/rejected": -295.5236511230469, "loss": 0.0224, "rewards/accuracies": 0.9375, "rewards/chosen": -1.389865517616272, "rewards/margins": 15.145090103149414, "rewards/rejected": -16.534955978393555, "step": 5206 }, { "epoch": 1.78, "learning_rate": 6.44270882701905e-08, "logits/chosen": 0.02748519368469715, "logits/rejected": 0.07520341128110886, "logps/chosen": -207.38267517089844, "logps/rejected": -369.93701171875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.7453824281692505, "rewards/margins": 19.717090606689453, "rewards/rejected": -20.462472915649414, "step": 5207 }, { "epoch": 1.78, "learning_rate": 6.423205140280796e-08, "logits/chosen": 0.07471171766519547, "logits/rejected": 0.11370830982923508, "logps/chosen": -184.4375762939453, "logps/rejected": -339.0945739746094, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.9523106813430786, "rewards/margins": 16.88324546813965, "rewards/rejected": -18.83555793762207, "step": 5208 }, { "epoch": 1.78, "learning_rate": 6.403730039962008e-08, "logits/chosen": 0.07134651392698288, "logits/rejected": 0.10758500546216965, "logps/chosen": -168.60610961914062, "logps/rejected": -262.9142761230469, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.0722649097442627, "rewards/margins": 12.203777313232422, "rewards/rejected": -15.276042938232422, "step": 5209 }, { "epoch": 1.78, "learning_rate": 6.384283532012014e-08, "logits/chosen": 0.08964251726865768, "logits/rejected": 0.11781299114227295, "logps/chosen": -204.0269012451172, "logps/rejected": -311.54046630859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.038614273071289, "rewards/margins": 14.356205940246582, "rewards/rejected": -16.394821166992188, "step": 5210 }, { "epoch": 1.78, "learning_rate": 6.364865622371474e-08, "logits/chosen": 0.07335497438907623, "logits/rejected": 0.08973819762468338, "logps/chosen": -221.59024047851562, "logps/rejected": -342.92083740234375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.7918200492858887, "rewards/margins": 12.696053504943848, "rewards/rejected": -13.487874984741211, "step": 5211 }, { "epoch": 1.78, "learning_rate": 6.34547631697232e-08, "logits/chosen": 0.1275675892829895, "logits/rejected": 0.14318633079528809, "logps/chosen": -137.971923828125, "logps/rejected": -289.6379699707031, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.8385632038116455, "rewards/margins": 14.194724082946777, "rewards/rejected": -17.033288955688477, "step": 5212 }, { "epoch": 1.78, "learning_rate": 6.326115621737648e-08, "logits/chosen": -0.06379767507314682, "logits/rejected": -0.001287476159632206, "logps/chosen": -194.4500732421875, "logps/rejected": -378.8905334472656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.538637638092041, "rewards/margins": 18.62294578552246, "rewards/rejected": -21.161582946777344, "step": 5213 }, { "epoch": 1.78, "learning_rate": 6.306783542581961e-08, "logits/chosen": 0.05580669268965721, "logits/rejected": 0.07444392144680023, "logps/chosen": -247.3232879638672, "logps/rejected": -376.904052734375, "loss": 0.0184, "rewards/accuracies": 0.9375, "rewards/chosen": -1.130280613899231, "rewards/margins": 15.624760627746582, "rewards/rejected": -16.755043029785156, "step": 5214 }, { "epoch": 1.78, "learning_rate": 6.287480085410913e-08, "logits/chosen": 0.00954971183091402, "logits/rejected": 0.03375544771552086, "logps/chosen": -221.14967346191406, "logps/rejected": -318.990234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5582880973815918, "rewards/margins": 13.17872142791748, "rewards/rejected": -13.73701000213623, "step": 5215 }, { "epoch": 1.78, "learning_rate": 6.268205256121394e-08, "logits/chosen": -0.0019750690553337336, "logits/rejected": 0.03780009597539902, "logps/chosen": -199.6941680908203, "logps/rejected": -306.5055847167969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0809839963912964, "rewards/margins": 14.099004745483398, "rewards/rejected": -15.179987907409668, "step": 5216 }, { "epoch": 1.78, "learning_rate": 6.248959060601689e-08, "logits/chosen": 0.04677998647093773, "logits/rejected": 0.08825209736824036, "logps/chosen": -276.08831787109375, "logps/rejected": -436.1196594238281, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.11425277590751648, "rewards/margins": 20.247472763061523, "rewards/rejected": -20.361722946166992, "step": 5217 }, { "epoch": 1.78, "learning_rate": 6.229741504731168e-08, "logits/chosen": 0.12909334897994995, "logits/rejected": 0.1370026022195816, "logps/chosen": -147.9228057861328, "logps/rejected": -379.4875793457031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4126640558242798, "rewards/margins": 19.171884536743164, "rewards/rejected": -20.584548950195312, "step": 5218 }, { "epoch": 1.78, "learning_rate": 6.210552594380569e-08, "logits/chosen": 0.06109689176082611, "logits/rejected": 0.07017010450363159, "logps/chosen": -184.82369995117188, "logps/rejected": -404.42474365234375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.720036268234253, "rewards/margins": 19.40521812438965, "rewards/rejected": -21.125253677368164, "step": 5219 }, { "epoch": 1.78, "learning_rate": 6.191392335411838e-08, "logits/chosen": 0.0764869898557663, "logits/rejected": 0.09720920026302338, "logps/chosen": -160.9084014892578, "logps/rejected": -303.6615905761719, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8367917537689209, "rewards/margins": 14.882064819335938, "rewards/rejected": -15.718855857849121, "step": 5220 }, { "epoch": 1.78, "learning_rate": 6.172260733678159e-08, "logits/chosen": 0.030974965542554855, "logits/rejected": 0.08524391800165176, "logps/chosen": -183.3015899658203, "logps/rejected": -282.92596435546875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.03619520366191864, "rewards/margins": 16.354124069213867, "rewards/rejected": -16.390317916870117, "step": 5221 }, { "epoch": 1.78, "learning_rate": 6.153157795023956e-08, "logits/chosen": 0.04747548699378967, "logits/rejected": 0.09744910895824432, "logps/chosen": -211.29531860351562, "logps/rejected": -268.46966552734375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.7376550436019897, "rewards/margins": 12.013577461242676, "rewards/rejected": -13.751233100891113, "step": 5222 }, { "epoch": 1.78, "learning_rate": 6.13408352528495e-08, "logits/chosen": 0.03243644908070564, "logits/rejected": 0.06404529511928558, "logps/chosen": -182.95594787597656, "logps/rejected": -308.2129211425781, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.68172025680542, "rewards/margins": 13.810120582580566, "rewards/rejected": -15.491841316223145, "step": 5223 }, { "epoch": 1.78, "learning_rate": 6.115037930288059e-08, "logits/chosen": 0.005590818356722593, "logits/rejected": 0.057363416999578476, "logps/chosen": -246.9388427734375, "logps/rejected": -371.2186584472656, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.24214637279510498, "rewards/margins": 18.428178787231445, "rewards/rejected": -18.670326232910156, "step": 5224 }, { "epoch": 1.78, "learning_rate": 6.096021015851416e-08, "logits/chosen": 0.048369988799095154, "logits/rejected": 0.06439898163080215, "logps/chosen": -149.0895233154297, "logps/rejected": -279.0314636230469, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.7864066362380981, "rewards/margins": 12.304333686828613, "rewards/rejected": -14.090740203857422, "step": 5225 }, { "epoch": 1.78, "learning_rate": 6.077032787784453e-08, "logits/chosen": 0.07560906559228897, "logits/rejected": 0.10517623275518417, "logps/chosen": -122.42594146728516, "logps/rejected": -242.35092163085938, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.5428848266601562, "rewards/margins": 14.092996597290039, "rewards/rejected": -15.635882377624512, "step": 5226 }, { "epoch": 1.78, "learning_rate": 6.05807325188783e-08, "logits/chosen": 0.009012066759169102, "logits/rejected": 0.016737665981054306, "logps/chosen": -176.6181640625, "logps/rejected": -343.4758605957031, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -1.6445045471191406, "rewards/margins": 17.410293579101562, "rewards/rejected": -19.054798126220703, "step": 5227 }, { "epoch": 1.78, "learning_rate": 6.039142413953379e-08, "logits/chosen": 0.1437082141637802, "logits/rejected": 0.17889580130577087, "logps/chosen": -192.4337921142578, "logps/rejected": -381.00189208984375, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.7457081079483032, "rewards/margins": 18.597267150878906, "rewards/rejected": -20.342975616455078, "step": 5228 }, { "epoch": 1.78, "learning_rate": 6.020240279764232e-08, "logits/chosen": 0.06792144477367401, "logits/rejected": 0.09138995409011841, "logps/chosen": -211.9699249267578, "logps/rejected": -421.9873046875, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.6562963724136353, "rewards/margins": 19.298757553100586, "rewards/rejected": -20.95505142211914, "step": 5229 }, { "epoch": 1.78, "learning_rate": 6.001366855094747e-08, "logits/chosen": -0.021427955478429794, "logits/rejected": 0.01679880917072296, "logps/chosen": -213.44992065429688, "logps/rejected": -461.4771728515625, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -2.0579187870025635, "rewards/margins": 22.896116256713867, "rewards/rejected": -24.954036712646484, "step": 5230 }, { "epoch": 1.79, "learning_rate": 5.982522145710456e-08, "logits/chosen": -0.13779625296592712, "logits/rejected": -0.09982344508171082, "logps/chosen": -179.9855499267578, "logps/rejected": -302.84625244140625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2188512086868286, "rewards/margins": 15.028390884399414, "rewards/rejected": -16.24724006652832, "step": 5231 }, { "epoch": 1.79, "learning_rate": 5.963706157368198e-08, "logits/chosen": 0.09448499232530594, "logits/rejected": 0.1243864893913269, "logps/chosen": -193.36724853515625, "logps/rejected": -309.47357177734375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.23859716951847076, "rewards/margins": 14.411243438720703, "rewards/rejected": -14.649840354919434, "step": 5232 }, { "epoch": 1.79, "learning_rate": 5.944918895815976e-08, "logits/chosen": 0.05137288197875023, "logits/rejected": 0.10321810096502304, "logps/chosen": -165.92002868652344, "logps/rejected": -267.64617919921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4142186641693115, "rewards/margins": 13.944559097290039, "rewards/rejected": -15.358776092529297, "step": 5233 }, { "epoch": 1.79, "learning_rate": 5.926160366793031e-08, "logits/chosen": 0.04538505896925926, "logits/rejected": 0.0736866444349289, "logps/chosen": -256.2775573730469, "logps/rejected": -400.8305969238281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9942991733551025, "rewards/margins": 17.929784774780273, "rewards/rejected": -18.924083709716797, "step": 5234 }, { "epoch": 1.79, "learning_rate": 5.9074305760298704e-08, "logits/chosen": 0.013630771078169346, "logits/rejected": 0.06209651753306389, "logps/chosen": -203.73947143554688, "logps/rejected": -415.86846923828125, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.6826696395874023, "rewards/margins": 20.250545501708984, "rewards/rejected": -20.93321418762207, "step": 5235 }, { "epoch": 1.79, "learning_rate": 5.8887295292481484e-08, "logits/chosen": 0.013890781439840794, "logits/rejected": 0.030774682760238647, "logps/chosen": -255.04116821289062, "logps/rejected": -456.0337829589844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0760374069213867, "rewards/margins": 19.073974609375, "rewards/rejected": -20.150012969970703, "step": 5236 }, { "epoch": 1.79, "learning_rate": 5.8700572321607855e-08, "logits/chosen": 0.026453427970409393, "logits/rejected": 0.07615748047828674, "logps/chosen": -220.73204040527344, "logps/rejected": -371.12127685546875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.7660872936248779, "rewards/margins": 20.10930633544922, "rewards/rejected": -20.87539291381836, "step": 5237 }, { "epoch": 1.79, "learning_rate": 5.851413690471929e-08, "logits/chosen": 0.0886550173163414, "logits/rejected": 0.10287630558013916, "logps/chosen": -160.53318786621094, "logps/rejected": -327.39697265625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.0835697650909424, "rewards/margins": 16.1912784576416, "rewards/rejected": -17.27484703063965, "step": 5238 }, { "epoch": 1.79, "learning_rate": 5.8327989098769016e-08, "logits/chosen": 0.059064093977212906, "logits/rejected": 0.09928102791309357, "logps/chosen": -211.0238800048828, "logps/rejected": -346.196044921875, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -2.1836791038513184, "rewards/margins": 15.670660972595215, "rewards/rejected": -17.854339599609375, "step": 5239 }, { "epoch": 1.79, "learning_rate": 5.814212896062276e-08, "logits/chosen": 0.10791296511888504, "logits/rejected": 0.13066139817237854, "logps/chosen": -197.83538818359375, "logps/rejected": -334.96356201171875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.8857916593551636, "rewards/margins": 16.259906768798828, "rewards/rejected": -18.14569854736328, "step": 5240 }, { "epoch": 1.79, "learning_rate": 5.795655654705811e-08, "logits/chosen": 0.06046295911073685, "logits/rejected": 0.07213041931390762, "logps/chosen": -177.72975158691406, "logps/rejected": -298.9306945800781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.5931828022003174, "rewards/margins": 12.79849624633789, "rewards/rejected": -14.391679763793945, "step": 5241 }, { "epoch": 1.79, "learning_rate": 5.777127191476516e-08, "logits/chosen": -0.006635562051087618, "logits/rejected": 0.018789267167448997, "logps/chosen": -260.75421142578125, "logps/rejected": -457.5325622558594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.1742794513702393, "rewards/margins": 19.681987762451172, "rewards/rejected": -20.85626792907715, "step": 5242 }, { "epoch": 1.79, "learning_rate": 5.758627512034542e-08, "logits/chosen": 0.09301448613405228, "logits/rejected": 0.11820582300424576, "logps/chosen": -211.3658905029297, "logps/rejected": -367.090576171875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -3.060199499130249, "rewards/margins": 16.04812240600586, "rewards/rejected": -19.108322143554688, "step": 5243 }, { "epoch": 1.79, "learning_rate": 5.7401566220313006e-08, "logits/chosen": 0.11533305048942566, "logits/rejected": 0.14823509752750397, "logps/chosen": -163.5730743408203, "logps/rejected": -303.270263671875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -3.106440544128418, "rewards/margins": 13.603778839111328, "rewards/rejected": -16.71021842956543, "step": 5244 }, { "epoch": 1.79, "learning_rate": 5.721714527109411e-08, "logits/chosen": 0.10855118185281754, "logits/rejected": 0.11204417049884796, "logps/chosen": -151.471435546875, "logps/rejected": -336.0478210449219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9277145862579346, "rewards/margins": 15.723148345947266, "rewards/rejected": -17.650863647460938, "step": 5245 }, { "epoch": 1.79, "learning_rate": 5.703301232902635e-08, "logits/chosen": -0.00576297240331769, "logits/rejected": 0.02900688163936138, "logps/chosen": -214.578857421875, "logps/rejected": -385.50836181640625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.435270071029663, "rewards/margins": 18.07233238220215, "rewards/rejected": -19.50760269165039, "step": 5246 }, { "epoch": 1.79, "learning_rate": 5.68491674503605e-08, "logits/chosen": 0.1318497657775879, "logits/rejected": 0.14246822893619537, "logps/chosen": -222.75796508789062, "logps/rejected": -397.1021423339844, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.7362465858459473, "rewards/margins": 17.60089683532715, "rewards/rejected": -19.337141036987305, "step": 5247 }, { "epoch": 1.79, "learning_rate": 5.666561069125797e-08, "logits/chosen": 0.06385941058397293, "logits/rejected": 0.11020975559949875, "logps/chosen": -228.5546112060547, "logps/rejected": -303.16986083984375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -2.2964391708374023, "rewards/margins": 13.911046028137207, "rewards/rejected": -16.20748519897461, "step": 5248 }, { "epoch": 1.79, "learning_rate": 5.648234210779312e-08, "logits/chosen": 0.08871838450431824, "logits/rejected": 0.13679179549217224, "logps/chosen": -249.18283081054688, "logps/rejected": -407.2679443359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0558665990829468, "rewards/margins": 19.023515701293945, "rewards/rejected": -20.079381942749023, "step": 5249 }, { "epoch": 1.79, "learning_rate": 5.629936175595207e-08, "logits/chosen": 0.025894025340676308, "logits/rejected": 0.05740707740187645, "logps/chosen": -199.44044494628906, "logps/rejected": -301.291259765625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.139168381690979, "rewards/margins": 13.634790420532227, "rewards/rejected": -14.773959159851074, "step": 5250 }, { "epoch": 1.79, "learning_rate": 5.611666969163242e-08, "logits/chosen": -0.04317121207714081, "logits/rejected": 0.0012213721638545394, "logps/chosen": -191.86610412597656, "logps/rejected": -356.26361083984375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.5370938777923584, "rewards/margins": 19.16685676574707, "rewards/rejected": -20.70395278930664, "step": 5251 }, { "epoch": 1.79, "learning_rate": 5.5934265970644434e-08, "logits/chosen": 0.10239122807979584, "logits/rejected": 0.12901169061660767, "logps/chosen": -205.09951782226562, "logps/rejected": -381.740966796875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.954761505126953, "rewards/margins": 16.331111907958984, "rewards/rejected": -19.285871505737305, "step": 5252 }, { "epoch": 1.79, "learning_rate": 5.575215064870986e-08, "logits/chosen": 0.06558822840452194, "logits/rejected": 0.08647504448890686, "logps/chosen": -214.57447814941406, "logps/rejected": -419.4477233886719, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.9726637601852417, "rewards/margins": 19.566591262817383, "rewards/rejected": -21.539255142211914, "step": 5253 }, { "epoch": 1.79, "learning_rate": 5.5570323781462315e-08, "logits/chosen": 0.04421018809080124, "logits/rejected": 0.07143417000770569, "logps/chosen": -208.1598358154297, "logps/rejected": -372.8926086425781, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.5453731417655945, "rewards/margins": 18.489465713500977, "rewards/rejected": -19.034841537475586, "step": 5254 }, { "epoch": 1.79, "learning_rate": 5.538878542444736e-08, "logits/chosen": 0.049638137221336365, "logits/rejected": 0.07184064388275146, "logps/chosen": -159.65545654296875, "logps/rejected": -304.1874694824219, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.8140300512313843, "rewards/margins": 13.665377616882324, "rewards/rejected": -15.479408264160156, "step": 5255 }, { "epoch": 1.79, "learning_rate": 5.5207535633122527e-08, "logits/chosen": 0.16027316451072693, "logits/rejected": 0.1927952915430069, "logps/chosen": -208.8376007080078, "logps/rejected": -300.3117370605469, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.0876986980438232, "rewards/margins": 13.58580493927002, "rewards/rejected": -14.673501968383789, "step": 5256 }, { "epoch": 1.79, "learning_rate": 5.50265744628573e-08, "logits/chosen": 0.00802379846572876, "logits/rejected": 0.05567529797554016, "logps/chosen": -217.0624542236328, "logps/rejected": -390.934326171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.0242128372192383, "rewards/margins": 16.494089126586914, "rewards/rejected": -19.51830291748047, "step": 5257 }, { "epoch": 1.79, "learning_rate": 5.484590196893246e-08, "logits/chosen": 0.013802026398479939, "logits/rejected": 0.04840940609574318, "logps/chosen": -219.16941833496094, "logps/rejected": -379.020263671875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8201535940170288, "rewards/margins": 18.37514877319336, "rewards/rejected": -19.19530487060547, "step": 5258 }, { "epoch": 1.79, "learning_rate": 5.466551820654131e-08, "logits/chosen": 0.09542659670114517, "logits/rejected": 0.09665482491254807, "logps/chosen": -199.44300842285156, "logps/rejected": -373.61346435546875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.419827461242676, "rewards/margins": 16.83928680419922, "rewards/rejected": -19.259113311767578, "step": 5259 }, { "epoch": 1.8, "learning_rate": 5.448542323078842e-08, "logits/chosen": 0.09304586797952652, "logits/rejected": 0.15134012699127197, "logps/chosen": -213.3332061767578, "logps/rejected": -355.01025390625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.6400229930877686, "rewards/margins": 18.783615112304688, "rewards/rejected": -19.42363739013672, "step": 5260 }, { "epoch": 1.8, "learning_rate": 5.4305617096690125e-08, "logits/chosen": 0.06576268374919891, "logits/rejected": 0.07921021431684494, "logps/chosen": -183.22943115234375, "logps/rejected": -322.6195068359375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.36238735914230347, "rewards/margins": 12.701154708862305, "rewards/rejected": -13.063543319702148, "step": 5261 }, { "epoch": 1.8, "learning_rate": 5.412609985917527e-08, "logits/chosen": 0.055053047835826874, "logits/rejected": 0.09494657069444656, "logps/chosen": -201.78900146484375, "logps/rejected": -302.8118896484375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.4563976526260376, "rewards/margins": 14.594097137451172, "rewards/rejected": -15.050495147705078, "step": 5262 }, { "epoch": 1.8, "learning_rate": 5.3946871573083416e-08, "logits/chosen": 0.0020115717779845, "logits/rejected": 0.052664488554000854, "logps/chosen": -193.42269897460938, "logps/rejected": -355.30328369140625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.243722438812256, "rewards/margins": 18.75611114501953, "rewards/rejected": -20.999835968017578, "step": 5263 }, { "epoch": 1.8, "learning_rate": 5.3767932293166455e-08, "logits/chosen": -0.01823727786540985, "logits/rejected": 0.01984136924147606, "logps/chosen": -217.0867462158203, "logps/rejected": -344.55426025390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2933441400527954, "rewards/margins": 17.05588722229004, "rewards/rejected": -18.349231719970703, "step": 5264 }, { "epoch": 1.8, "learning_rate": 5.358928207408797e-08, "logits/chosen": -0.03591516986489296, "logits/rejected": 0.006536186207085848, "logps/chosen": -264.729736328125, "logps/rejected": -338.6743469238281, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2426601648330688, "rewards/margins": 14.595714569091797, "rewards/rejected": -15.838375091552734, "step": 5265 }, { "epoch": 1.8, "learning_rate": 5.341092097042299e-08, "logits/chosen": 0.05601704493165016, "logits/rejected": 0.07815106213092804, "logps/chosen": -194.6244659423828, "logps/rejected": -347.0442199707031, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.7619593143463135, "rewards/margins": 15.371744155883789, "rewards/rejected": -17.133703231811523, "step": 5266 }, { "epoch": 1.8, "learning_rate": 5.323284903665837e-08, "logits/chosen": 0.1091427206993103, "logits/rejected": 0.12347140908241272, "logps/chosen": -204.20584106445312, "logps/rejected": -347.4830322265625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.6476470232009888, "rewards/margins": 14.653322219848633, "rewards/rejected": -16.300968170166016, "step": 5267 }, { "epoch": 1.8, "learning_rate": 5.305506632719292e-08, "logits/chosen": 0.04934553802013397, "logits/rejected": 0.07113079726696014, "logps/chosen": -215.56207275390625, "logps/rejected": -403.73150634765625, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": -0.9109436273574829, "rewards/margins": 18.055206298828125, "rewards/rejected": -18.966148376464844, "step": 5268 }, { "epoch": 1.8, "learning_rate": 5.2877572896336434e-08, "logits/chosen": 0.08602257072925568, "logits/rejected": 0.09353533387184143, "logps/chosen": -254.45916748046875, "logps/rejected": -445.865234375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.330247402191162, "rewards/margins": 18.092748641967773, "rewards/rejected": -20.422996520996094, "step": 5269 }, { "epoch": 1.8, "learning_rate": 5.2700368798310745e-08, "logits/chosen": 0.08112816512584686, "logits/rejected": 0.10863244533538818, "logps/chosen": -239.3667755126953, "logps/rejected": -431.34356689453125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.5280088186264038, "rewards/margins": 19.182048797607422, "rewards/rejected": -20.710058212280273, "step": 5270 }, { "epoch": 1.8, "learning_rate": 5.252345408724956e-08, "logits/chosen": 0.08548388630151749, "logits/rejected": 0.12230993807315826, "logps/chosen": -118.34387969970703, "logps/rejected": -235.17025756835938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.938507318496704, "rewards/margins": 13.725491523742676, "rewards/rejected": -15.663999557495117, "step": 5271 }, { "epoch": 1.8, "learning_rate": 5.234682881719765e-08, "logits/chosen": -0.0021188510581851006, "logits/rejected": 0.025287028402090073, "logps/chosen": -240.4770050048828, "logps/rejected": -317.7357177734375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -3.048572540283203, "rewards/margins": 12.760631561279297, "rewards/rejected": -15.8092041015625, "step": 5272 }, { "epoch": 1.8, "learning_rate": 5.217049304211152e-08, "logits/chosen": 0.04296201094985008, "logits/rejected": 0.059386104345321655, "logps/chosen": -220.7044677734375, "logps/rejected": -390.6326599121094, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.833199143409729, "rewards/margins": 17.931873321533203, "rewards/rejected": -19.765071868896484, "step": 5273 }, { "epoch": 1.8, "learning_rate": 5.199444681585951e-08, "logits/chosen": -0.05482383072376251, "logits/rejected": -0.042930178344249725, "logps/chosen": -205.26280212402344, "logps/rejected": -370.1759948730469, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.0450644493103027, "rewards/margins": 15.851266860961914, "rewards/rejected": -17.896331787109375, "step": 5274 }, { "epoch": 1.8, "learning_rate": 5.181869019222129e-08, "logits/chosen": 0.1395983248949051, "logits/rejected": 0.17074933648109436, "logps/chosen": -204.5299072265625, "logps/rejected": -327.7202453613281, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1544570922851562, "rewards/margins": 14.826010704040527, "rewards/rejected": -15.98046875, "step": 5275 }, { "epoch": 1.8, "learning_rate": 5.164322322488801e-08, "logits/chosen": 0.04651019349694252, "logits/rejected": 0.0794454962015152, "logps/chosen": -171.1738739013672, "logps/rejected": -317.8570861816406, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.5364909172058105, "rewards/margins": 16.024911880493164, "rewards/rejected": -18.5614013671875, "step": 5276 }, { "epoch": 1.8, "learning_rate": 5.146804596746235e-08, "logits/chosen": 0.044549230486154556, "logits/rejected": 0.06985535472631454, "logps/chosen": -209.81752014160156, "logps/rejected": -389.6458435058594, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.7098731398582458, "rewards/margins": 18.768667221069336, "rewards/rejected": -19.47854232788086, "step": 5277 }, { "epoch": 1.8, "learning_rate": 5.129315847345861e-08, "logits/chosen": -0.0009211506112478673, "logits/rejected": 0.006767774932086468, "logps/chosen": -171.65061950683594, "logps/rejected": -331.8766784667969, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.967144250869751, "rewards/margins": 14.482952117919922, "rewards/rejected": -16.450096130371094, "step": 5278 }, { "epoch": 1.8, "learning_rate": 5.1118560796302504e-08, "logits/chosen": -0.0024517320562154055, "logits/rejected": 0.03860361874103546, "logps/chosen": -265.3810729980469, "logps/rejected": -428.0825500488281, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -2.536097288131714, "rewards/margins": 18.761747360229492, "rewards/rejected": -21.29784393310547, "step": 5279 }, { "epoch": 1.8, "learning_rate": 5.094425298933136e-08, "logits/chosen": 0.036396000534296036, "logits/rejected": 0.05645057559013367, "logps/chosen": -225.78794860839844, "logps/rejected": -403.2762756347656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.6023950576782227, "rewards/margins": 16.49831199645996, "rewards/rejected": -20.1007080078125, "step": 5280 }, { "epoch": 1.8, "learning_rate": 5.077023510579359e-08, "logits/chosen": -0.02366173081099987, "logits/rejected": 0.01228267326951027, "logps/chosen": -199.77894592285156, "logps/rejected": -358.6493225097656, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5199344754219055, "rewards/margins": 17.855913162231445, "rewards/rejected": -18.37584686279297, "step": 5281 }, { "epoch": 1.8, "learning_rate": 5.0596507198849334e-08, "logits/chosen": 0.08766207098960876, "logits/rejected": 0.10679058730602264, "logps/chosen": -200.1387176513672, "logps/rejected": -333.18218994140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3683581352233887, "rewards/margins": 15.133013725280762, "rewards/rejected": -16.501371383666992, "step": 5282 }, { "epoch": 1.8, "learning_rate": 5.042306932157014e-08, "logits/chosen": 0.11419849842786789, "logits/rejected": 0.145134836435318, "logps/chosen": -225.17771911621094, "logps/rejected": -364.5093078613281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5155465602874756, "rewards/margins": 16.054534912109375, "rewards/rejected": -17.570079803466797, "step": 5283 }, { "epoch": 1.8, "learning_rate": 5.024992152693874e-08, "logits/chosen": 0.029249396175146103, "logits/rejected": 0.04690520837903023, "logps/chosen": -229.01168823242188, "logps/rejected": -447.64422607421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.0562546253204346, "rewards/margins": 18.773591995239258, "rewards/rejected": -21.82984733581543, "step": 5284 }, { "epoch": 1.8, "learning_rate": 5.007706386784949e-08, "logits/chosen": 0.09782435745000839, "logits/rejected": 0.12994572520256042, "logps/chosen": -215.78663635253906, "logps/rejected": -363.8897705078125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.7112562656402588, "rewards/margins": 17.034107208251953, "rewards/rejected": -17.745361328125, "step": 5285 }, { "epoch": 1.8, "learning_rate": 4.9904496397108144e-08, "logits/chosen": 0.06949492543935776, "logits/rejected": 0.0979708656668663, "logps/chosen": -192.02822875976562, "logps/rejected": -355.1387023925781, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.5073730945587158, "rewards/margins": 17.60961151123047, "rewards/rejected": -19.116985321044922, "step": 5286 }, { "epoch": 1.8, "learning_rate": 4.9732219167431314e-08, "logits/chosen": 0.018384233117103577, "logits/rejected": 0.043492209166288376, "logps/chosen": -231.4805145263672, "logps/rejected": -371.7270812988281, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.8084113597869873, "rewards/margins": 14.4599609375, "rewards/rejected": -17.268373489379883, "step": 5287 }, { "epoch": 1.8, "learning_rate": 4.956023223144767e-08, "logits/chosen": 0.072603240609169, "logits/rejected": 0.11754540354013443, "logps/chosen": -196.97630310058594, "logps/rejected": -374.44439697265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.19329515099525452, "rewards/margins": 18.325414657592773, "rewards/rejected": -18.13212013244629, "step": 5288 }, { "epoch": 1.81, "learning_rate": 4.938853564169665e-08, "logits/chosen": 0.13014087080955505, "logits/rejected": 0.1776624470949173, "logps/chosen": -250.07479858398438, "logps/rejected": -360.3832702636719, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -2.4825680255889893, "rewards/margins": 16.230058670043945, "rewards/rejected": -18.712627410888672, "step": 5289 }, { "epoch": 1.81, "learning_rate": 4.921712945062939e-08, "logits/chosen": 0.06971287727355957, "logits/rejected": 0.08658511936664581, "logps/chosen": -237.91461181640625, "logps/rejected": -366.9279479980469, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.3010554313659668, "rewards/margins": 14.042705535888672, "rewards/rejected": -15.343761444091797, "step": 5290 }, { "epoch": 1.81, "learning_rate": 4.9046013710607904e-08, "logits/chosen": 0.18690603971481323, "logits/rejected": 0.19081814587116241, "logps/chosen": -170.25558471679688, "logps/rejected": -337.9425354003906, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.1410014629364014, "rewards/margins": 16.993579864501953, "rewards/rejected": -19.13458251953125, "step": 5291 }, { "epoch": 1.81, "learning_rate": 4.88751884739057e-08, "logits/chosen": 0.05235971137881279, "logits/rejected": 0.10969215631484985, "logps/chosen": -226.0452423095703, "logps/rejected": -380.6473693847656, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.1170506477355957, "rewards/margins": 16.34956169128418, "rewards/rejected": -17.46661376953125, "step": 5292 }, { "epoch": 1.81, "learning_rate": 4.870465379270772e-08, "logits/chosen": 0.08926639705896378, "logits/rejected": 0.08019217103719711, "logps/chosen": -166.8345489501953, "logps/rejected": -345.7791748046875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.975308895111084, "rewards/margins": 13.503157615661621, "rewards/rejected": -15.478466033935547, "step": 5293 }, { "epoch": 1.81, "learning_rate": 4.8534409719109715e-08, "logits/chosen": 0.07761085033416748, "logits/rejected": 0.10046039521694183, "logps/chosen": -218.6248779296875, "logps/rejected": -352.26275634765625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.631410837173462, "rewards/margins": 13.685558319091797, "rewards/rejected": -15.31696891784668, "step": 5294 }, { "epoch": 1.81, "learning_rate": 4.8364456305119315e-08, "logits/chosen": 0.03633209317922592, "logits/rejected": 0.05768058821558952, "logps/chosen": -187.22637939453125, "logps/rejected": -345.3057861328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.923014760017395, "rewards/margins": 17.667940139770508, "rewards/rejected": -19.59095573425293, "step": 5295 }, { "epoch": 1.81, "learning_rate": 4.819479360265444e-08, "logits/chosen": 0.06569024175405502, "logits/rejected": 0.07817228883504868, "logps/chosen": -136.38958740234375, "logps/rejected": -293.8713073730469, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.464078426361084, "rewards/margins": 14.079474449157715, "rewards/rejected": -16.54355239868164, "step": 5296 }, { "epoch": 1.81, "learning_rate": 4.802542166354495e-08, "logits/chosen": 0.0040032933466136456, "logits/rejected": 0.04343828931450844, "logps/chosen": -245.984619140625, "logps/rejected": -364.63818359375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.7428295612335205, "rewards/margins": 14.931868553161621, "rewards/rejected": -15.674698829650879, "step": 5297 }, { "epoch": 1.81, "learning_rate": 4.785634053953169e-08, "logits/chosen": -0.044035058468580246, "logits/rejected": -0.022945599630475044, "logps/chosen": -199.251708984375, "logps/rejected": -342.289794921875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.967952251434326, "rewards/margins": 13.121543884277344, "rewards/rejected": -16.089496612548828, "step": 5298 }, { "epoch": 1.81, "learning_rate": 4.7687550282266343e-08, "logits/chosen": 0.19632090628147125, "logits/rejected": 0.20619262754917145, "logps/chosen": -141.52395629882812, "logps/rejected": -355.15533447265625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -3.2590441703796387, "rewards/margins": 18.165781021118164, "rewards/rejected": -21.424827575683594, "step": 5299 }, { "epoch": 1.81, "learning_rate": 4.751905094331232e-08, "logits/chosen": 0.00762465363368392, "logits/rejected": 0.02724453993141651, "logps/chosen": -197.5363311767578, "logps/rejected": -328.233642578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.1246862411499023, "rewards/margins": 13.046472549438477, "rewards/rejected": -14.171158790588379, "step": 5300 }, { "epoch": 1.81, "learning_rate": 4.735084257414379e-08, "logits/chosen": -0.025437641888856888, "logits/rejected": 0.03631195053458214, "logps/chosen": -190.8457794189453, "logps/rejected": -352.5854797363281, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.083434581756592, "rewards/margins": 18.097003936767578, "rewards/rejected": -20.180438995361328, "step": 5301 }, { "epoch": 1.81, "learning_rate": 4.7182925226145865e-08, "logits/chosen": 0.009562606923282146, "logits/rejected": 0.027661927044391632, "logps/chosen": -183.4684295654297, "logps/rejected": -415.7038269042969, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.707623839378357, "rewards/margins": 18.237247467041016, "rewards/rejected": -19.94487190246582, "step": 5302 }, { "epoch": 1.81, "learning_rate": 4.701529895061507e-08, "logits/chosen": 0.07320041209459305, "logits/rejected": 0.07655919343233109, "logps/chosen": -184.13780212402344, "logps/rejected": -332.5062255859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.172654867172241, "rewards/margins": 12.421096801757812, "rewards/rejected": -14.593751907348633, "step": 5303 }, { "epoch": 1.81, "learning_rate": 4.684796379875922e-08, "logits/chosen": 0.17423716187477112, "logits/rejected": 0.1830691695213318, "logps/chosen": -136.25674438476562, "logps/rejected": -280.34039306640625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.1821537017822266, "rewards/margins": 13.612346649169922, "rewards/rejected": -15.794499397277832, "step": 5304 }, { "epoch": 1.81, "learning_rate": 4.6680919821696415e-08, "logits/chosen": 0.11308290809392929, "logits/rejected": 0.1575983166694641, "logps/chosen": -143.85841369628906, "logps/rejected": -318.8763122558594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.124478578567505, "rewards/margins": 16.8492431640625, "rewards/rejected": -18.97372055053711, "step": 5305 }, { "epoch": 1.81, "learning_rate": 4.6514167070456524e-08, "logits/chosen": 0.02304183878004551, "logits/rejected": 0.025128137320280075, "logps/chosen": -178.08773803710938, "logps/rejected": -336.2538757324219, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.902153491973877, "rewards/margins": 12.773934364318848, "rewards/rejected": -14.676087379455566, "step": 5306 }, { "epoch": 1.81, "learning_rate": 4.6347705595980224e-08, "logits/chosen": -0.08495374023914337, "logits/rejected": -0.0722828283905983, "logps/chosen": -233.6384735107422, "logps/rejected": -424.45989990234375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4041345119476318, "rewards/margins": 18.071977615356445, "rewards/rejected": -19.476112365722656, "step": 5307 }, { "epoch": 1.81, "learning_rate": 4.618153544911929e-08, "logits/chosen": 0.015816964209079742, "logits/rejected": 0.02181219309568405, "logps/chosen": -211.0326690673828, "logps/rejected": -390.8618469238281, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.6841377019882202, "rewards/margins": 16.49640464782715, "rewards/rejected": -18.180543899536133, "step": 5308 }, { "epoch": 1.81, "learning_rate": 4.6015656680636226e-08, "logits/chosen": -0.03402446210384369, "logits/rejected": -0.018113482743501663, "logps/chosen": -205.6002655029297, "logps/rejected": -343.7834777832031, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.4931311011314392, "rewards/margins": 16.229976654052734, "rewards/rejected": -16.723108291625977, "step": 5309 }, { "epoch": 1.81, "learning_rate": 4.585006934120472e-08, "logits/chosen": 0.1794862300157547, "logits/rejected": 0.1938248723745346, "logps/chosen": -189.86575317382812, "logps/rejected": -407.2131652832031, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9532562494277954, "rewards/margins": 18.29277992248535, "rewards/rejected": -19.246036529541016, "step": 5310 }, { "epoch": 1.81, "learning_rate": 4.568477348140942e-08, "logits/chosen": 0.11118017137050629, "logits/rejected": 0.14143694937229156, "logps/chosen": -232.90846252441406, "logps/rejected": -342.67657470703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4304136037826538, "rewards/margins": 15.506749153137207, "rewards/rejected": -15.937161445617676, "step": 5311 }, { "epoch": 1.81, "learning_rate": 4.551976915174605e-08, "logits/chosen": 0.10436209291219711, "logits/rejected": 0.1692214012145996, "logps/chosen": -222.45704650878906, "logps/rejected": -353.4271240234375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.137399435043335, "rewards/margins": 18.710124969482422, "rewards/rejected": -20.847524642944336, "step": 5312 }, { "epoch": 1.81, "learning_rate": 4.535505640262116e-08, "logits/chosen": 0.0027429270558059216, "logits/rejected": 0.030228571966290474, "logps/chosen": -214.0808868408203, "logps/rejected": -368.9654235839844, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.8698134422302246, "rewards/margins": 16.643049240112305, "rewards/rejected": -18.512861251831055, "step": 5313 }, { "epoch": 1.81, "learning_rate": 4.5190635284352074e-08, "logits/chosen": 0.21408583223819733, "logits/rejected": 0.2248976081609726, "logps/chosen": -193.09408569335938, "logps/rejected": -326.80291748046875, "loss": 0.0218, "rewards/accuracies": 0.9375, "rewards/chosen": -2.067141532897949, "rewards/margins": 13.316076278686523, "rewards/rejected": -15.383218765258789, "step": 5314 }, { "epoch": 1.81, "learning_rate": 4.5026505847167276e-08, "logits/chosen": 0.03667835146188736, "logits/rejected": 0.0775759220123291, "logps/chosen": -197.32400512695312, "logps/rejected": -306.835693359375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.4309537410736084, "rewards/margins": 15.077122688293457, "rewards/rejected": -17.50807762145996, "step": 5315 }, { "epoch": 1.81, "learning_rate": 4.486266814120609e-08, "logits/chosen": 0.006433965638279915, "logits/rejected": 0.013078464195132256, "logps/chosen": -245.47976684570312, "logps/rejected": -419.0324401855469, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.817035675048828, "rewards/margins": 16.963544845581055, "rewards/rejected": -19.780580520629883, "step": 5316 }, { "epoch": 1.81, "learning_rate": 4.4699122216518504e-08, "logits/chosen": 0.13936090469360352, "logits/rejected": 0.17074427008628845, "logps/chosen": -197.71173095703125, "logps/rejected": -331.8619079589844, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.997836709022522, "rewards/margins": 16.71640396118164, "rewards/rejected": -17.71424102783203, "step": 5317 }, { "epoch": 1.82, "learning_rate": 4.453586812306564e-08, "logits/chosen": 0.1186399906873703, "logits/rejected": 0.1425434947013855, "logps/chosen": -219.04222106933594, "logps/rejected": -452.5718994140625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.3605826795101166, "rewards/margins": 20.003822326660156, "rewards/rejected": -19.64324188232422, "step": 5318 }, { "epoch": 1.82, "learning_rate": 4.437290591071952e-08, "logits/chosen": 0.07005514204502106, "logits/rejected": 0.12080216407775879, "logps/chosen": -221.7462921142578, "logps/rejected": -348.4072570800781, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.9471663236618042, "rewards/margins": 15.701016426086426, "rewards/rejected": -17.648181915283203, "step": 5319 }, { "epoch": 1.82, "learning_rate": 4.421023562926251e-08, "logits/chosen": 0.13717801868915558, "logits/rejected": 0.15833589434623718, "logps/chosen": -188.74000549316406, "logps/rejected": -366.949951171875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.1895253658294678, "rewards/margins": 18.581880569458008, "rewards/rejected": -19.771406173706055, "step": 5320 }, { "epoch": 1.82, "learning_rate": 4.404785732838845e-08, "logits/chosen": 0.0054255458526313305, "logits/rejected": 0.033099137246608734, "logps/chosen": -196.7889862060547, "logps/rejected": -360.14447021484375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.401498556137085, "rewards/margins": 16.42203140258789, "rewards/rejected": -19.823532104492188, "step": 5321 }, { "epoch": 1.82, "learning_rate": 4.3885771057701416e-08, "logits/chosen": 0.1092635914683342, "logits/rejected": 0.11752490699291229, "logps/chosen": -198.03146362304688, "logps/rejected": -422.255615234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.1446094512939453, "rewards/margins": 18.509376525878906, "rewards/rejected": -20.65398597717285, "step": 5322 }, { "epoch": 1.82, "learning_rate": 4.3723976866716915e-08, "logits/chosen": 0.16775214672088623, "logits/rejected": 0.20522557199001312, "logps/chosen": -246.8109130859375, "logps/rejected": -370.2969970703125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.157285690307617, "rewards/margins": 16.477331161499023, "rewards/rejected": -18.63461685180664, "step": 5323 }, { "epoch": 1.82, "learning_rate": 4.3562474804860304e-08, "logits/chosen": -0.14491590857505798, "logits/rejected": -0.13934260606765747, "logps/chosen": -263.7772216796875, "logps/rejected": -483.0724792480469, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.6759023666381836, "rewards/margins": 20.088138580322266, "rewards/rejected": -21.764039993286133, "step": 5324 }, { "epoch": 1.82, "learning_rate": 4.340126492146856e-08, "logits/chosen": -0.0561990924179554, "logits/rejected": -0.03714130446314812, "logps/chosen": -210.72532653808594, "logps/rejected": -340.2583923339844, "loss": 0.0176, "rewards/accuracies": 0.9375, "rewards/chosen": -1.227998971939087, "rewards/margins": 15.383563041687012, "rewards/rejected": -16.611560821533203, "step": 5325 }, { "epoch": 1.82, "learning_rate": 4.324034726578907e-08, "logits/chosen": -0.021102814003825188, "logits/rejected": 0.016104314476251602, "logps/chosen": -264.78662109375, "logps/rejected": -437.4349670410156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7763545513153076, "rewards/margins": 19.33907127380371, "rewards/rejected": -20.11542510986328, "step": 5326 }, { "epoch": 1.82, "learning_rate": 4.307972188697995e-08, "logits/chosen": 0.14055314660072327, "logits/rejected": 0.16105253994464874, "logps/chosen": -174.41845703125, "logps/rejected": -344.31243896484375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.6763627529144287, "rewards/margins": 15.253722190856934, "rewards/rejected": -17.930084228515625, "step": 5327 }, { "epoch": 1.82, "learning_rate": 4.291938883411006e-08, "logits/chosen": 0.11305862665176392, "logits/rejected": 0.1426510512828827, "logps/chosen": -187.26881408691406, "logps/rejected": -309.5482482910156, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.740262031555176, "rewards/margins": 13.095287322998047, "rewards/rejected": -15.835549354553223, "step": 5328 }, { "epoch": 1.82, "learning_rate": 4.275934815615878e-08, "logits/chosen": 0.0754564106464386, "logits/rejected": 0.11766713112592697, "logps/chosen": -204.7325897216797, "logps/rejected": -303.56976318359375, "loss": 0.0234, "rewards/accuracies": 0.9375, "rewards/chosen": -1.788435697555542, "rewards/margins": 12.731325149536133, "rewards/rejected": -14.51976203918457, "step": 5329 }, { "epoch": 1.82, "learning_rate": 4.259959990201645e-08, "logits/chosen": 0.13010983169078827, "logits/rejected": 0.1315576434135437, "logps/chosen": -175.89431762695312, "logps/rejected": -351.8483581542969, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -3.2182602882385254, "rewards/margins": 14.56866455078125, "rewards/rejected": -17.78692626953125, "step": 5330 }, { "epoch": 1.82, "learning_rate": 4.2440144120484026e-08, "logits/chosen": 0.0991579070687294, "logits/rejected": 0.14305925369262695, "logps/chosen": -229.88876342773438, "logps/rejected": -319.3437805175781, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.458047866821289, "rewards/margins": 13.493075370788574, "rewards/rejected": -15.95112419128418, "step": 5331 }, { "epoch": 1.82, "learning_rate": 4.228098086027287e-08, "logits/chosen": 0.09049529582262039, "logits/rejected": 0.11603130400180817, "logps/chosen": -188.53689575195312, "logps/rejected": -335.63641357421875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.4329209327697754, "rewards/margins": 16.280046463012695, "rewards/rejected": -17.71296501159668, "step": 5332 }, { "epoch": 1.82, "learning_rate": 4.2122110170005224e-08, "logits/chosen": 0.018350327387452126, "logits/rejected": 0.040010180324316025, "logps/chosen": -166.31930541992188, "logps/rejected": -332.6642761230469, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.3790905475616455, "rewards/margins": 16.375106811523438, "rewards/rejected": -18.754196166992188, "step": 5333 }, { "epoch": 1.82, "learning_rate": 4.1963532098214126e-08, "logits/chosen": 0.0278201662003994, "logits/rejected": 0.05768091231584549, "logps/chosen": -276.74530029296875, "logps/rejected": -410.4419250488281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.2211010456085205, "rewards/margins": 16.115488052368164, "rewards/rejected": -18.33658790588379, "step": 5334 }, { "epoch": 1.82, "learning_rate": 4.180524669334251e-08, "logits/chosen": -0.004572242498397827, "logits/rejected": 0.043890584260225296, "logps/chosen": -199.02284240722656, "logps/rejected": -296.302001953125, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -0.5670484304428101, "rewards/margins": 13.090083122253418, "rewards/rejected": -13.657132148742676, "step": 5335 }, { "epoch": 1.82, "learning_rate": 4.164725400374469e-08, "logits/chosen": 0.10096552222967148, "logits/rejected": 0.14525553584098816, "logps/chosen": -220.2744140625, "logps/rejected": -340.5769958496094, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.577881336212158, "rewards/margins": 14.579607009887695, "rewards/rejected": -17.157487869262695, "step": 5336 }, { "epoch": 1.82, "learning_rate": 4.148955407768528e-08, "logits/chosen": 0.12022563070058823, "logits/rejected": 0.11419002711772919, "logps/chosen": -160.10250854492188, "logps/rejected": -346.5257263183594, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.2766563892364502, "rewards/margins": 16.682804107666016, "rewards/rejected": -17.95945930480957, "step": 5337 }, { "epoch": 1.82, "learning_rate": 4.133214696333942e-08, "logits/chosen": 0.1050884872674942, "logits/rejected": 0.12464792281389236, "logps/chosen": -181.2301025390625, "logps/rejected": -342.8912048339844, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -3.066535234451294, "rewards/margins": 16.35204315185547, "rewards/rejected": -19.418577194213867, "step": 5338 }, { "epoch": 1.82, "learning_rate": 4.117503270879274e-08, "logits/chosen": -0.00695144385099411, "logits/rejected": 0.014830603264272213, "logps/chosen": -220.22267150878906, "logps/rejected": -416.1907653808594, "loss": 0.0271, "rewards/accuracies": 0.9375, "rewards/chosen": -2.44382381439209, "rewards/margins": 18.319419860839844, "rewards/rejected": -20.763242721557617, "step": 5339 }, { "epoch": 1.82, "learning_rate": 4.1018211362041424e-08, "logits/chosen": -0.021817490458488464, "logits/rejected": 0.014942780137062073, "logps/chosen": -206.3405303955078, "logps/rejected": -400.7236328125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.250338077545166, "rewards/margins": 19.50140953063965, "rewards/rejected": -21.751747131347656, "step": 5340 }, { "epoch": 1.82, "learning_rate": 4.086168297099224e-08, "logits/chosen": 0.05944760516285896, "logits/rejected": 0.09094223380088806, "logps/chosen": -170.34922790527344, "logps/rejected": -348.8916015625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.009324789047241, "rewards/margins": 15.79426383972168, "rewards/rejected": -17.803590774536133, "step": 5341 }, { "epoch": 1.82, "learning_rate": 4.0705447583462724e-08, "logits/chosen": 0.1001962423324585, "logits/rejected": 0.1256980001926422, "logps/chosen": -187.50497436523438, "logps/rejected": -307.54193115234375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.7270101308822632, "rewards/margins": 14.065574645996094, "rewards/rejected": -15.792585372924805, "step": 5342 }, { "epoch": 1.82, "learning_rate": 4.054950524718026e-08, "logits/chosen": 0.031097983941435814, "logits/rejected": 0.07999254018068314, "logps/chosen": -247.75909423828125, "logps/rejected": -389.97186279296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0011705160140991, "rewards/margins": 18.53399085998535, "rewards/rejected": -19.535160064697266, "step": 5343 }, { "epoch": 1.82, "learning_rate": 4.0393856009783176e-08, "logits/chosen": 0.0004062707885168493, "logits/rejected": 0.006128332111984491, "logps/chosen": -208.18746948242188, "logps/rejected": -340.1334228515625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.5778201222419739, "rewards/margins": 12.778228759765625, "rewards/rejected": -13.356048583984375, "step": 5344 }, { "epoch": 1.82, "learning_rate": 4.0238499918820336e-08, "logits/chosen": -0.006217645015567541, "logits/rejected": 0.008555278182029724, "logps/chosen": -212.53298950195312, "logps/rejected": -372.84881591796875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6780648231506348, "rewards/margins": 17.121427536010742, "rewards/rejected": -17.799489974975586, "step": 5345 }, { "epoch": 1.82, "learning_rate": 4.008343702175088e-08, "logits/chosen": 0.09825361520051956, "logits/rejected": 0.124551922082901, "logps/chosen": -187.46897888183594, "logps/rejected": -312.2935485839844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.891080379486084, "rewards/margins": 11.709718704223633, "rewards/rejected": -13.600799560546875, "step": 5346 }, { "epoch": 1.82, "learning_rate": 3.9928667365944024e-08, "logits/chosen": -0.010474552400410175, "logits/rejected": -0.010911143384873867, "logps/chosen": -211.945068359375, "logps/rejected": -430.42791748046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8011326789855957, "rewards/margins": 21.30120086669922, "rewards/rejected": -23.102333068847656, "step": 5347 }, { "epoch": 1.83, "learning_rate": 3.977419099868018e-08, "logits/chosen": 0.09007066488265991, "logits/rejected": 0.10752860456705093, "logps/chosen": -167.41322326660156, "logps/rejected": -339.4940185546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.548864483833313, "rewards/margins": 17.303958892822266, "rewards/rejected": -18.852821350097656, "step": 5348 }, { "epoch": 1.83, "learning_rate": 3.9620007967149597e-08, "logits/chosen": -0.023280048742890358, "logits/rejected": -0.015749964863061905, "logps/chosen": -168.4314422607422, "logps/rejected": -312.3398742675781, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.7321264743804932, "rewards/margins": 13.846784591674805, "rewards/rejected": -15.578911781311035, "step": 5349 }, { "epoch": 1.83, "learning_rate": 3.9466118318452924e-08, "logits/chosen": 0.21538446843624115, "logits/rejected": 0.244547039270401, "logps/chosen": -163.49826049804688, "logps/rejected": -338.61175537109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7340505719184875, "rewards/margins": 17.37677764892578, "rewards/rejected": -18.110828399658203, "step": 5350 }, { "epoch": 1.83, "learning_rate": 3.931252209960156e-08, "logits/chosen": 0.054509200155735016, "logits/rejected": 0.07686582207679749, "logps/chosen": -174.56504821777344, "logps/rejected": -336.3119201660156, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.4986236095428467, "rewards/margins": 16.003774642944336, "rewards/rejected": -17.502399444580078, "step": 5351 }, { "epoch": 1.83, "learning_rate": 3.915921935751687e-08, "logits/chosen": 0.07957460731267929, "logits/rejected": 0.12278800457715988, "logps/chosen": -248.83766174316406, "logps/rejected": -361.3822937011719, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.14673596620559692, "rewards/margins": 14.756714820861816, "rewards/rejected": -14.903451919555664, "step": 5352 }, { "epoch": 1.83, "learning_rate": 3.900621013903083e-08, "logits/chosen": -0.05734527111053467, "logits/rejected": -0.04818904027342796, "logps/chosen": -214.78375244140625, "logps/rejected": -413.757568359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9330677390098572, "rewards/margins": 17.928325653076172, "rewards/rejected": -18.86139488220215, "step": 5353 }, { "epoch": 1.83, "learning_rate": 3.8853494490885486e-08, "logits/chosen": 0.03438965603709221, "logits/rejected": 0.07700825482606888, "logps/chosen": -246.33209228515625, "logps/rejected": -378.028076171875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.899030864238739, "rewards/margins": 16.180580139160156, "rewards/rejected": -17.079608917236328, "step": 5354 }, { "epoch": 1.83, "learning_rate": 3.870107245973342e-08, "logits/chosen": 0.030293187126517296, "logits/rejected": 0.044469911605119705, "logps/chosen": -240.49441528320312, "logps/rejected": -464.4971008300781, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -1.1044636964797974, "rewards/margins": 18.93231964111328, "rewards/rejected": -20.036781311035156, "step": 5355 }, { "epoch": 1.83, "learning_rate": 3.85489440921376e-08, "logits/chosen": 0.1553945690393448, "logits/rejected": 0.1522565484046936, "logps/chosen": -155.18128967285156, "logps/rejected": -365.1151123046875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.3337595760822296, "rewards/margins": 18.12248992919922, "rewards/rejected": -18.456249237060547, "step": 5356 }, { "epoch": 1.83, "learning_rate": 3.839710943457108e-08, "logits/chosen": 0.017494946718215942, "logits/rejected": 0.06149079650640488, "logps/chosen": -260.0850830078125, "logps/rejected": -420.3037109375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.5575900077819824, "rewards/margins": 17.264373779296875, "rewards/rejected": -19.821962356567383, "step": 5357 }, { "epoch": 1.83, "learning_rate": 3.8245568533417204e-08, "logits/chosen": 0.0011991284554824233, "logits/rejected": 0.02050757221877575, "logps/chosen": -251.8214874267578, "logps/rejected": -433.49798583984375, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.3644999265670776, "rewards/margins": 18.396167755126953, "rewards/rejected": -19.76066780090332, "step": 5358 }, { "epoch": 1.83, "learning_rate": 3.80943214349696e-08, "logits/chosen": 0.0026500525418668985, "logits/rejected": 0.017608506605029106, "logps/chosen": -207.52120971679688, "logps/rejected": -339.3544921875, "loss": 0.0303, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5379443168640137, "rewards/margins": 12.674816131591797, "rewards/rejected": -14.212760925292969, "step": 5359 }, { "epoch": 1.83, "learning_rate": 3.794336818543209e-08, "logits/chosen": -0.051052454859018326, "logits/rejected": -0.014385947957634926, "logps/chosen": -257.4150695800781, "logps/rejected": -424.2967529296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.1889801025390625, "rewards/margins": 19.556434631347656, "rewards/rejected": -21.745412826538086, "step": 5360 }, { "epoch": 1.83, "learning_rate": 3.779270883091923e-08, "logits/chosen": 0.04142602160573006, "logits/rejected": 0.08190169185400009, "logps/chosen": -266.3714904785156, "logps/rejected": -421.09735107421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4067754745483398, "rewards/margins": 18.742225646972656, "rewards/rejected": -20.149002075195312, "step": 5361 }, { "epoch": 1.83, "learning_rate": 3.764234341745487e-08, "logits/chosen": -0.026218390092253685, "logits/rejected": 0.000802692084107548, "logps/chosen": -251.91551208496094, "logps/rejected": -395.9836120605469, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.532198905944824, "rewards/margins": 15.275762557983398, "rewards/rejected": -17.80796241760254, "step": 5362 }, { "epoch": 1.83, "learning_rate": 3.749227199097371e-08, "logits/chosen": 0.05626174807548523, "logits/rejected": 0.04375007748603821, "logps/chosen": -219.56893920898438, "logps/rejected": -443.2066345214844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.200636625289917, "rewards/margins": 19.05548095703125, "rewards/rejected": -21.256114959716797, "step": 5363 }, { "epoch": 1.83, "learning_rate": 3.734249459732075e-08, "logits/chosen": -0.007132785860449076, "logits/rejected": 0.01552835013717413, "logps/chosen": -221.1422119140625, "logps/rejected": -420.2417907714844, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -1.9848335981369019, "rewards/margins": 20.424177169799805, "rewards/rejected": -22.409013748168945, "step": 5364 }, { "epoch": 1.83, "learning_rate": 3.719301128225061e-08, "logits/chosen": 0.01649096980690956, "logits/rejected": 0.05706190690398216, "logps/chosen": -266.7289123535156, "logps/rejected": -410.3607177734375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.3096314072608948, "rewards/margins": 20.240264892578125, "rewards/rejected": -19.930633544921875, "step": 5365 }, { "epoch": 1.83, "learning_rate": 3.7043822091428426e-08, "logits/chosen": 0.12229902297258377, "logits/rejected": 0.14737537503242493, "logps/chosen": -222.93116760253906, "logps/rejected": -362.0103454589844, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -0.655199408531189, "rewards/margins": 15.698360443115234, "rewards/rejected": -16.353561401367188, "step": 5366 }, { "epoch": 1.83, "learning_rate": 3.689492707042974e-08, "logits/chosen": 0.1502278745174408, "logits/rejected": 0.16737695038318634, "logps/chosen": -210.7252655029297, "logps/rejected": -352.63604736328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.961390256881714, "rewards/margins": 13.806297302246094, "rewards/rejected": -16.76768684387207, "step": 5367 }, { "epoch": 1.83, "learning_rate": 3.67463262647395e-08, "logits/chosen": 0.06368742138147354, "logits/rejected": 0.08323297649621964, "logps/chosen": -225.89952087402344, "logps/rejected": -333.4109191894531, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.09533737599849701, "rewards/margins": 14.387377738952637, "rewards/rejected": -14.292040824890137, "step": 5368 }, { "epoch": 1.83, "learning_rate": 3.6598019719753406e-08, "logits/chosen": 0.013091372326016426, "logits/rejected": 0.07442201673984528, "logps/chosen": -256.9961242675781, "logps/rejected": -384.0907287597656, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.8679725527763367, "rewards/margins": 17.396743774414062, "rewards/rejected": -18.264713287353516, "step": 5369 }, { "epoch": 1.83, "learning_rate": 3.645000748077709e-08, "logits/chosen": 0.03966134414076805, "logits/rejected": 0.09378216415643692, "logps/chosen": -273.83013916015625, "logps/rejected": -408.1557312011719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7607725858688354, "rewards/margins": 16.61372947692871, "rewards/rejected": -18.374502182006836, "step": 5370 }, { "epoch": 1.83, "learning_rate": 3.630228959302617e-08, "logits/chosen": 0.2138066291809082, "logits/rejected": 0.2653157711029053, "logps/chosen": -117.10089874267578, "logps/rejected": -184.65713500976562, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.7708563208580017, "rewards/margins": 11.573173522949219, "rewards/rejected": -12.344029426574707, "step": 5371 }, { "epoch": 1.83, "learning_rate": 3.615486610162655e-08, "logits/chosen": 0.005000806413590908, "logits/rejected": 0.037835560739040375, "logps/chosen": -218.94798278808594, "logps/rejected": -397.16619873046875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -2.1510281562805176, "rewards/margins": 19.769577026367188, "rewards/rejected": -21.920604705810547, "step": 5372 }, { "epoch": 1.83, "learning_rate": 3.600773705161386e-08, "logits/chosen": 0.1070740818977356, "logits/rejected": 0.15039071440696716, "logps/chosen": -193.0981903076172, "logps/rejected": -347.5086975097656, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.8014969825744629, "rewards/margins": 18.019258499145508, "rewards/rejected": -18.820756912231445, "step": 5373 }, { "epoch": 1.83, "learning_rate": 3.5860902487934164e-08, "logits/chosen": 0.06747539341449738, "logits/rejected": 0.09341614693403244, "logps/chosen": -189.58984375, "logps/rejected": -343.020263671875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.9230350255966187, "rewards/margins": 15.61035442352295, "rewards/rejected": -17.533390045166016, "step": 5374 }, { "epoch": 1.83, "learning_rate": 3.571436245544335e-08, "logits/chosen": 0.047790683805942535, "logits/rejected": 0.06340590119361877, "logps/chosen": -216.2481689453125, "logps/rejected": -350.14141845703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.010514035820960999, "rewards/margins": 14.876727104187012, "rewards/rejected": -14.866211891174316, "step": 5375 }, { "epoch": 1.83, "learning_rate": 3.556811699890749e-08, "logits/chosen": 0.017968690022826195, "logits/rejected": 0.04444539174437523, "logps/chosen": -255.9254150390625, "logps/rejected": -366.9942321777344, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.4229644536972046, "rewards/margins": 12.438844680786133, "rewards/rejected": -13.861809730529785, "step": 5376 }, { "epoch": 1.84, "learning_rate": 3.542216616300231e-08, "logits/chosen": 0.04198920726776123, "logits/rejected": 0.040160637348890305, "logps/chosen": -186.10707092285156, "logps/rejected": -415.08587646484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.4648520946502686, "rewards/margins": 17.40450096130371, "rewards/rejected": -19.869356155395508, "step": 5377 }, { "epoch": 1.84, "learning_rate": 3.52765099923138e-08, "logits/chosen": 0.10545133054256439, "logits/rejected": 0.1268393099308014, "logps/chosen": -212.7836151123047, "logps/rejected": -393.2713928222656, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.6854275465011597, "rewards/margins": 17.574691772460938, "rewards/rejected": -18.260120391845703, "step": 5378 }, { "epoch": 1.84, "learning_rate": 3.513114853133825e-08, "logits/chosen": 0.13542146980762482, "logits/rejected": 0.1432904601097107, "logps/chosen": -210.75570678710938, "logps/rejected": -422.3984375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.797390103340149, "rewards/margins": 19.026086807250977, "rewards/rejected": -20.82347869873047, "step": 5379 }, { "epoch": 1.84, "learning_rate": 3.498608182448115e-08, "logits/chosen": -0.027823220938444138, "logits/rejected": 0.011316823773086071, "logps/chosen": -225.37191772460938, "logps/rejected": -373.7603759765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.111161708831787, "rewards/margins": 16.27397918701172, "rewards/rejected": -17.38513946533203, "step": 5380 }, { "epoch": 1.84, "learning_rate": 3.484130991605871e-08, "logits/chosen": 0.060215454548597336, "logits/rejected": 0.06366556137800217, "logps/chosen": -162.13546752929688, "logps/rejected": -301.5058898925781, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5160623788833618, "rewards/margins": 12.55473804473877, "rewards/rejected": -14.070801734924316, "step": 5381 }, { "epoch": 1.84, "learning_rate": 3.469683285029657e-08, "logits/chosen": 0.06361858546733856, "logits/rejected": 0.07595190405845642, "logps/chosen": -217.48562622070312, "logps/rejected": -423.3812561035156, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2518494129180908, "rewards/margins": 17.418556213378906, "rewards/rejected": -18.670406341552734, "step": 5382 }, { "epoch": 1.84, "learning_rate": 3.455265067133051e-08, "logits/chosen": -0.005620343144983053, "logits/rejected": 0.02482759952545166, "logps/chosen": -225.5878143310547, "logps/rejected": -402.96661376953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.46630895137786865, "rewards/margins": 20.000247955322266, "rewards/rejected": -20.4665584564209, "step": 5383 }, { "epoch": 1.84, "learning_rate": 3.440876342320609e-08, "logits/chosen": 0.05863453075289726, "logits/rejected": 0.08720631897449493, "logps/chosen": -192.06138610839844, "logps/rejected": -339.4063415527344, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.4061800241470337, "rewards/margins": 15.76928424835205, "rewards/rejected": -17.175464630126953, "step": 5384 }, { "epoch": 1.84, "learning_rate": 3.426517114987926e-08, "logits/chosen": 0.1231195256114006, "logits/rejected": 0.16460812091827393, "logps/chosen": -186.9476318359375, "logps/rejected": -321.3775329589844, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.375799536705017, "rewards/margins": 17.148283004760742, "rewards/rejected": -18.52408218383789, "step": 5385 }, { "epoch": 1.84, "learning_rate": 3.412187389521482e-08, "logits/chosen": -0.02550036460161209, "logits/rejected": 0.06250753253698349, "logps/chosen": -276.5636291503906, "logps/rejected": -263.4898681640625, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.1017371416091919, "rewards/margins": 12.380863189697266, "rewards/rejected": -12.482601165771484, "step": 5386 }, { "epoch": 1.84, "learning_rate": 3.397887170298885e-08, "logits/chosen": 0.04713613539934158, "logits/rejected": 0.1112525537610054, "logps/chosen": -261.0050048828125, "logps/rejected": -397.8265686035156, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.08295552432537079, "rewards/margins": 19.256452560424805, "rewards/rejected": -19.173498153686523, "step": 5387 }, { "epoch": 1.84, "learning_rate": 3.383616461688599e-08, "logits/chosen": 0.03624267876148224, "logits/rejected": 0.07479635626077652, "logps/chosen": -202.098388671875, "logps/rejected": -357.1336669921875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.547117233276367, "rewards/margins": 16.372915267944336, "rewards/rejected": -18.920032501220703, "step": 5388 }, { "epoch": 1.84, "learning_rate": 3.369375268050156e-08, "logits/chosen": 0.12575176358222961, "logits/rejected": 0.16042201220989227, "logps/chosen": -153.5846405029297, "logps/rejected": -325.5931701660156, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -1.7950067520141602, "rewards/margins": 18.30945587158203, "rewards/rejected": -20.104461669921875, "step": 5389 }, { "epoch": 1.84, "learning_rate": 3.355163593734045e-08, "logits/chosen": 0.08054908365011215, "logits/rejected": 0.09299004822969437, "logps/chosen": -192.52975463867188, "logps/rejected": -343.20648193359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7506169080734253, "rewards/margins": 15.791775703430176, "rewards/rejected": -16.54239273071289, "step": 5390 }, { "epoch": 1.84, "learning_rate": 3.340981443081714e-08, "logits/chosen": -0.008272949606180191, "logits/rejected": 0.04348333925008774, "logps/chosen": -194.08425903320312, "logps/rejected": -331.32373046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7626590728759766, "rewards/margins": 16.57845687866211, "rewards/rejected": -18.341115951538086, "step": 5391 }, { "epoch": 1.84, "learning_rate": 3.3268288204256314e-08, "logits/chosen": 0.024316959083080292, "logits/rejected": 0.03752882033586502, "logps/chosen": -189.91102600097656, "logps/rejected": -338.7118835449219, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -1.7434284687042236, "rewards/margins": 14.294170379638672, "rewards/rejected": -16.03759765625, "step": 5392 }, { "epoch": 1.84, "learning_rate": 3.312705730089227e-08, "logits/chosen": 0.09371145069599152, "logits/rejected": 0.10617196559906006, "logps/chosen": -139.44253540039062, "logps/rejected": -290.0228271484375, "loss": 0.0281, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3115344047546387, "rewards/margins": 14.184577941894531, "rewards/rejected": -16.496112823486328, "step": 5393 }, { "epoch": 1.84, "learning_rate": 3.2986121763869165e-08, "logits/chosen": -0.05715540051460266, "logits/rejected": -0.022420071065425873, "logps/chosen": -226.20831298828125, "logps/rejected": -376.1212158203125, "loss": 0.0566, "rewards/accuracies": 0.9375, "rewards/chosen": -0.802251935005188, "rewards/margins": 17.3459529876709, "rewards/rejected": -18.148204803466797, "step": 5394 }, { "epoch": 1.84, "learning_rate": 3.284548163624068e-08, "logits/chosen": 0.061877649277448654, "logits/rejected": 0.09518478065729141, "logps/chosen": -237.24545288085938, "logps/rejected": -402.7661437988281, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -2.313922166824341, "rewards/margins": 18.600563049316406, "rewards/rejected": -20.91448402404785, "step": 5395 }, { "epoch": 1.84, "learning_rate": 3.270513696097055e-08, "logits/chosen": 0.13378207385540009, "logits/rejected": 0.16734381020069122, "logps/chosen": -171.0839385986328, "logps/rejected": -372.3160400390625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.752814769744873, "rewards/margins": 19.171846389770508, "rewards/rejected": -20.92466163635254, "step": 5396 }, { "epoch": 1.84, "learning_rate": 3.2565087780932255e-08, "logits/chosen": 0.10673739016056061, "logits/rejected": 0.13132722675800323, "logps/chosen": -182.6449737548828, "logps/rejected": -403.80096435546875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2501267194747925, "rewards/margins": 19.4609432220459, "rewards/rejected": -20.711071014404297, "step": 5397 }, { "epoch": 1.84, "learning_rate": 3.242533413890858e-08, "logits/chosen": 0.1203223466873169, "logits/rejected": 0.15513183176517487, "logps/chosen": -206.35203552246094, "logps/rejected": -331.3785705566406, "loss": 0.0311, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2839552164077759, "rewards/margins": 16.97159767150879, "rewards/rejected": -16.68764305114746, "step": 5398 }, { "epoch": 1.84, "learning_rate": 3.2285876077592475e-08, "logits/chosen": 0.029568733647465706, "logits/rejected": 0.04628743603825569, "logps/chosen": -147.643310546875, "logps/rejected": -347.6544189453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.4347314834594727, "rewards/margins": 18.685386657714844, "rewards/rejected": -20.1201171875, "step": 5399 }, { "epoch": 1.84, "learning_rate": 3.214671363958665e-08, "logits/chosen": 0.02931736782193184, "logits/rejected": 0.07258576154708862, "logps/chosen": -226.6798858642578, "logps/rejected": -385.1549072265625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.1465413570404053, "rewards/margins": 17.753398895263672, "rewards/rejected": -20.899940490722656, "step": 5400 }, { "epoch": 1.84, "learning_rate": 3.2007846867402874e-08, "logits/chosen": 0.10989506542682648, "logits/rejected": 0.13617803156375885, "logps/chosen": -257.0050048828125, "logps/rejected": -425.4461364746094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7573176026344299, "rewards/margins": 18.23264503479004, "rewards/rejected": -18.989959716796875, "step": 5401 }, { "epoch": 1.84, "learning_rate": 3.186927580346355e-08, "logits/chosen": 0.02782680280506611, "logits/rejected": 0.06254633516073227, "logps/chosen": -185.86083984375, "logps/rejected": -306.48406982421875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.4743971824645996, "rewards/margins": 13.59383487701416, "rewards/rejected": -16.0682315826416, "step": 5402 }, { "epoch": 1.84, "learning_rate": 3.1731000490099915e-08, "logits/chosen": 0.12852370738983154, "logits/rejected": 0.13989849388599396, "logps/chosen": -189.09359741210938, "logps/rejected": -329.8347473144531, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8834488391876221, "rewards/margins": 14.665287971496582, "rewards/rejected": -15.548737525939941, "step": 5403 }, { "epoch": 1.84, "learning_rate": 3.159302096955318e-08, "logits/chosen": 0.0048104762099683285, "logits/rejected": 0.06335245817899704, "logps/chosen": -259.03448486328125, "logps/rejected": -384.7709655761719, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.5988346338272095, "rewards/margins": 15.918415069580078, "rewards/rejected": -17.517250061035156, "step": 5404 }, { "epoch": 1.84, "learning_rate": 3.145533728397432e-08, "logits/chosen": 0.17127296328544617, "logits/rejected": 0.19269387423992157, "logps/chosen": -131.15765380859375, "logps/rejected": -228.14474487304688, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.6747870445251465, "rewards/margins": 12.485546112060547, "rewards/rejected": -14.160331726074219, "step": 5405 }, { "epoch": 1.85, "learning_rate": 3.131794947542354e-08, "logits/chosen": 0.06721693277359009, "logits/rejected": 0.062776118516922, "logps/chosen": -197.67518615722656, "logps/rejected": -450.8343505859375, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -2.3199758529663086, "rewards/margins": 20.199501037597656, "rewards/rejected": -22.51947593688965, "step": 5406 }, { "epoch": 1.85, "learning_rate": 3.118085758587119e-08, "logits/chosen": 0.04933018982410431, "logits/rejected": 0.0987325981259346, "logps/chosen": -232.26260375976562, "logps/rejected": -330.35479736328125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.6711567640304565, "rewards/margins": 15.161638259887695, "rewards/rejected": -16.83279800415039, "step": 5407 }, { "epoch": 1.85, "learning_rate": 3.104406165719686e-08, "logits/chosen": 0.049921490252017975, "logits/rejected": 0.06582341343164444, "logps/chosen": -186.98480224609375, "logps/rejected": -318.18389892578125, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.176466703414917, "rewards/margins": 14.477806091308594, "rewards/rejected": -15.654272079467773, "step": 5408 }, { "epoch": 1.85, "learning_rate": 3.09075617311898e-08, "logits/chosen": 0.08283792436122894, "logits/rejected": 0.09669123589992523, "logps/chosen": -177.52789306640625, "logps/rejected": -344.798583984375, "loss": 0.0182, "rewards/accuracies": 0.9375, "rewards/chosen": -1.867553949356079, "rewards/margins": 15.184672355651855, "rewards/rejected": -17.052228927612305, "step": 5409 }, { "epoch": 1.85, "learning_rate": 3.0771357849548876e-08, "logits/chosen": 0.060952283442020416, "logits/rejected": 0.11347953975200653, "logps/chosen": -210.25244140625, "logps/rejected": -293.59429931640625, "loss": 0.0236, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6690791845321655, "rewards/margins": 14.84932804107666, "rewards/rejected": -16.51840591430664, "step": 5410 }, { "epoch": 1.85, "learning_rate": 3.063545005388246e-08, "logits/chosen": -0.046307310461997986, "logits/rejected": -0.02715761587023735, "logps/chosen": -237.51889038085938, "logps/rejected": -399.68023681640625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.9910709261894226, "rewards/margins": 17.011552810668945, "rewards/rejected": -18.002622604370117, "step": 5411 }, { "epoch": 1.85, "learning_rate": 3.049983838570858e-08, "logits/chosen": 0.15906307101249695, "logits/rejected": 0.16973203420639038, "logps/chosen": -200.78640747070312, "logps/rejected": -364.8368835449219, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.0230752229690552, "rewards/margins": 14.112639427185059, "rewards/rejected": -15.135713577270508, "step": 5412 }, { "epoch": 1.85, "learning_rate": 3.0364522886454636e-08, "logits/chosen": -0.012426488101482391, "logits/rejected": 0.013476280495524406, "logps/chosen": -185.4866180419922, "logps/rejected": -364.6934814453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.7913548946380615, "rewards/margins": 17.3737850189209, "rewards/rejected": -20.16514015197754, "step": 5413 }, { "epoch": 1.85, "learning_rate": 3.0229503597457684e-08, "logits/chosen": 0.19815687835216522, "logits/rejected": 0.21802383661270142, "logps/chosen": -167.6114501953125, "logps/rejected": -288.1700439453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5890170335769653, "rewards/margins": 13.310047149658203, "rewards/rejected": -14.899065017700195, "step": 5414 }, { "epoch": 1.85, "learning_rate": 3.0094780559964285e-08, "logits/chosen": 0.029302168637514114, "logits/rejected": 0.05728558078408241, "logps/chosen": -158.2386932373047, "logps/rejected": -321.02923583984375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.6757787466049194, "rewards/margins": 16.29728889465332, "rewards/rejected": -17.973068237304688, "step": 5415 }, { "epoch": 1.85, "learning_rate": 2.9960353815130293e-08, "logits/chosen": -0.005584442988038063, "logits/rejected": 0.0366828553378582, "logps/chosen": -243.93112182617188, "logps/rejected": -386.0838623046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.052127718925476, "rewards/margins": 17.8981990814209, "rewards/rejected": -18.950326919555664, "step": 5416 }, { "epoch": 1.85, "learning_rate": 2.982622340402163e-08, "logits/chosen": 0.04998601973056793, "logits/rejected": 0.06121715530753136, "logps/chosen": -181.9102020263672, "logps/rejected": -372.8289794921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.147280216217041, "rewards/margins": 17.255043029785156, "rewards/rejected": -20.402320861816406, "step": 5417 }, { "epoch": 1.85, "learning_rate": 2.969238936761276e-08, "logits/chosen": 0.005575074348598719, "logits/rejected": 0.012586873024702072, "logps/chosen": -190.48922729492188, "logps/rejected": -444.31207275390625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.197523593902588, "rewards/margins": 21.667112350463867, "rewards/rejected": -24.864639282226562, "step": 5418 }, { "epoch": 1.85, "learning_rate": 2.9558851746788516e-08, "logits/chosen": 0.06684968620538712, "logits/rejected": 0.09832561016082764, "logps/chosen": -211.0978240966797, "logps/rejected": -344.5654296875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0897297859191895, "rewards/margins": 14.705289840698242, "rewards/rejected": -16.79501724243164, "step": 5419 }, { "epoch": 1.85, "learning_rate": 2.942561058234283e-08, "logits/chosen": -0.023789145052433014, "logits/rejected": 0.0039007540326565504, "logps/chosen": -299.1754150390625, "logps/rejected": -470.68060302734375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.0563061237335205, "rewards/margins": 18.879507064819336, "rewards/rejected": -20.935815811157227, "step": 5420 }, { "epoch": 1.85, "learning_rate": 2.92926659149787e-08, "logits/chosen": -0.0208736564964056, "logits/rejected": 0.010634982027113438, "logps/chosen": -248.55380249023438, "logps/rejected": -353.22418212890625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6505237817764282, "rewards/margins": 13.672009468078613, "rewards/rejected": -14.322532653808594, "step": 5421 }, { "epoch": 1.85, "learning_rate": 2.9160017785309077e-08, "logits/chosen": -0.04236384481191635, "logits/rejected": -0.015306871384382248, "logps/chosen": -255.79342651367188, "logps/rejected": -443.8122253417969, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -2.5071828365325928, "rewards/margins": 17.37657356262207, "rewards/rejected": -19.883756637573242, "step": 5422 }, { "epoch": 1.85, "learning_rate": 2.902766623385633e-08, "logits/chosen": -0.018567273393273354, "logits/rejected": 0.028614191338419914, "logps/chosen": -210.55519104003906, "logps/rejected": -321.6222839355469, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5346753597259521, "rewards/margins": 17.449520111083984, "rewards/rejected": -18.98419761657715, "step": 5423 }, { "epoch": 1.85, "learning_rate": 2.889561130105167e-08, "logits/chosen": 0.002717823488637805, "logits/rejected": 0.0250457301735878, "logps/chosen": -207.44674682617188, "logps/rejected": -352.95391845703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3509615659713745, "rewards/margins": 17.131175994873047, "rewards/rejected": -18.48213768005371, "step": 5424 }, { "epoch": 1.85, "learning_rate": 2.8763853027236273e-08, "logits/chosen": 0.11698534339666367, "logits/rejected": 0.13451150059700012, "logps/chosen": -138.24078369140625, "logps/rejected": -258.11346435546875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.029609441757202, "rewards/margins": 12.863863945007324, "rewards/rejected": -14.893473625183105, "step": 5425 }, { "epoch": 1.85, "learning_rate": 2.8632391452660277e-08, "logits/chosen": 0.023757752031087875, "logits/rejected": 0.030557136982679367, "logps/chosen": -195.8697967529297, "logps/rejected": -378.2895202636719, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6457442045211792, "rewards/margins": 16.74722671508789, "rewards/rejected": -18.392972946166992, "step": 5426 }, { "epoch": 1.85, "learning_rate": 2.8501226617483777e-08, "logits/chosen": 0.14189881086349487, "logits/rejected": 0.15846574306488037, "logps/chosen": -195.5729522705078, "logps/rejected": -341.0184631347656, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.7266736030578613, "rewards/margins": 15.088306427001953, "rewards/rejected": -17.814979553222656, "step": 5427 }, { "epoch": 1.85, "learning_rate": 2.837035856177539e-08, "logits/chosen": 0.05108363926410675, "logits/rejected": 0.08347377926111221, "logps/chosen": -149.05117797851562, "logps/rejected": -260.30706787109375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2362014055252075, "rewards/margins": 13.956829071044922, "rewards/rejected": -15.19303035736084, "step": 5428 }, { "epoch": 1.85, "learning_rate": 2.8239787325513797e-08, "logits/chosen": 0.22273491322994232, "logits/rejected": 0.23821179568767548, "logps/chosen": -118.86640167236328, "logps/rejected": -288.4989013671875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3637800216674805, "rewards/margins": 14.76170539855957, "rewards/rejected": -16.125484466552734, "step": 5429 }, { "epoch": 1.85, "learning_rate": 2.8109512948586545e-08, "logits/chosen": 0.030798407271504402, "logits/rejected": 0.061878882348537445, "logps/chosen": -211.85043334960938, "logps/rejected": -350.3665771484375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.382347583770752, "rewards/margins": 15.142461776733398, "rewards/rejected": -17.524810791015625, "step": 5430 }, { "epoch": 1.85, "learning_rate": 2.797953547079057e-08, "logits/chosen": 0.11747409403324127, "logits/rejected": 0.13558068871498108, "logps/chosen": -134.58555603027344, "logps/rejected": -301.7273864746094, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.592560887336731, "rewards/margins": 17.53121566772461, "rewards/rejected": -18.123777389526367, "step": 5431 }, { "epoch": 1.85, "learning_rate": 2.784985493183256e-08, "logits/chosen": 0.10421086102724075, "logits/rejected": 0.11180643737316132, "logps/chosen": -195.33456420898438, "logps/rejected": -402.8699645996094, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.9665099382400513, "rewards/margins": 18.71192169189453, "rewards/rejected": -20.678430557250977, "step": 5432 }, { "epoch": 1.85, "learning_rate": 2.7720471371327826e-08, "logits/chosen": 0.04019828885793686, "logits/rejected": 0.060725897550582886, "logps/chosen": -143.00421142578125, "logps/rejected": -233.19532775878906, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6177422404289246, "rewards/margins": 12.274444580078125, "rewards/rejected": -12.89218521118164, "step": 5433 }, { "epoch": 1.85, "learning_rate": 2.759138482880119e-08, "logits/chosen": 0.11979777365922928, "logits/rejected": 0.15352007746696472, "logps/chosen": -192.58023071289062, "logps/rejected": -357.06341552734375, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -2.1930949687957764, "rewards/margins": 17.176448822021484, "rewards/rejected": -19.369543075561523, "step": 5434 }, { "epoch": 1.85, "learning_rate": 2.7462595343687224e-08, "logits/chosen": 0.05191940814256668, "logits/rejected": 0.1073780357837677, "logps/chosen": -238.94497680664062, "logps/rejected": -351.1835021972656, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.462472915649414, "rewards/margins": 17.122032165527344, "rewards/rejected": -18.58450698852539, "step": 5435 }, { "epoch": 1.86, "learning_rate": 2.7334102955328897e-08, "logits/chosen": 0.008180183358490467, "logits/rejected": 0.043223313987255096, "logps/chosen": -227.93089294433594, "logps/rejected": -333.86474609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.842207431793213, "rewards/margins": 13.648212432861328, "rewards/rejected": -15.490419387817383, "step": 5436 }, { "epoch": 1.86, "learning_rate": 2.7205907702979036e-08, "logits/chosen": 0.15343326330184937, "logits/rejected": 0.17435322701931, "logps/chosen": -226.79776000976562, "logps/rejected": -338.5353698730469, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1718913316726685, "rewards/margins": 14.835519790649414, "rewards/rejected": -16.00741195678711, "step": 5437 }, { "epoch": 1.86, "learning_rate": 2.7078009625799537e-08, "logits/chosen": -0.010964266024529934, "logits/rejected": 0.03589313104748726, "logps/chosen": -257.232177734375, "logps/rejected": -403.0899963378906, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.5884921550750732, "rewards/margins": 17.49774932861328, "rewards/rejected": -20.08624267578125, "step": 5438 }, { "epoch": 1.86, "learning_rate": 2.6950408762861478e-08, "logits/chosen": 0.18708530068397522, "logits/rejected": 0.2200380265712738, "logps/chosen": -198.55349731445312, "logps/rejected": -373.6661682128906, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.012338638305664, "rewards/margins": 17.98905372619629, "rewards/rejected": -20.001392364501953, "step": 5439 }, { "epoch": 1.86, "learning_rate": 2.682310515314512e-08, "logits/chosen": 0.20673643052577972, "logits/rejected": 0.24268066883087158, "logps/chosen": -211.10598754882812, "logps/rejected": -336.8863830566406, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.0210464000701904, "rewards/margins": 16.749080657958984, "rewards/rejected": -18.770126342773438, "step": 5440 }, { "epoch": 1.86, "learning_rate": 2.6696098835540025e-08, "logits/chosen": 0.04707947373390198, "logits/rejected": 0.08807700127363205, "logps/chosen": -235.82534790039062, "logps/rejected": -339.5009765625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.8997677564620972, "rewards/margins": 14.147542953491211, "rewards/rejected": -16.04730987548828, "step": 5441 }, { "epoch": 1.86, "learning_rate": 2.6569389848845048e-08, "logits/chosen": 0.05085630714893341, "logits/rejected": 0.10230883210897446, "logps/chosen": -240.78106689453125, "logps/rejected": -444.90570068359375, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.6877327561378479, "rewards/margins": 21.761566162109375, "rewards/rejected": -22.449298858642578, "step": 5442 }, { "epoch": 1.86, "learning_rate": 2.6442978231767554e-08, "logits/chosen": 0.06884264945983887, "logits/rejected": 0.1357099413871765, "logps/chosen": -219.6832733154297, "logps/rejected": -391.2635192871094, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4738200902938843, "rewards/margins": 21.19656753540039, "rewards/rejected": -22.67038917541504, "step": 5443 }, { "epoch": 1.86, "learning_rate": 2.631686402292499e-08, "logits/chosen": 0.0030265876557677984, "logits/rejected": 0.038635678589344025, "logps/chosen": -172.0049591064453, "logps/rejected": -279.3609313964844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.40239644050598145, "rewards/margins": 14.38644027709961, "rewards/rejected": -13.984045028686523, "step": 5444 }, { "epoch": 1.86, "learning_rate": 2.6191047260843314e-08, "logits/chosen": 0.10157115757465363, "logits/rejected": 0.13803072273731232, "logps/chosen": -160.1929473876953, "logps/rejected": -303.6064453125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.6544220447540283, "rewards/margins": 15.587475776672363, "rewards/rejected": -18.241897583007812, "step": 5445 }, { "epoch": 1.86, "learning_rate": 2.606552798395778e-08, "logits/chosen": 0.01879720948636532, "logits/rejected": 0.055180102586746216, "logps/chosen": -178.82371520996094, "logps/rejected": -304.76751708984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8477635383605957, "rewards/margins": 16.028013229370117, "rewards/rejected": -16.875776290893555, "step": 5446 }, { "epoch": 1.86, "learning_rate": 2.594030623061294e-08, "logits/chosen": 0.09784120321273804, "logits/rejected": 0.11945290863513947, "logps/chosen": -169.08477783203125, "logps/rejected": -324.17449951171875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.9183737635612488, "rewards/margins": 14.956780433654785, "rewards/rejected": -15.875154495239258, "step": 5447 }, { "epoch": 1.86, "learning_rate": 2.5815382039062307e-08, "logits/chosen": 0.1490478515625, "logits/rejected": 0.17409905791282654, "logps/chosen": -201.9737548828125, "logps/rejected": -325.1297302246094, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.7982771396636963, "rewards/margins": 15.61981201171875, "rewards/rejected": -16.418088912963867, "step": 5448 }, { "epoch": 1.86, "learning_rate": 2.5690755447468126e-08, "logits/chosen": -0.02130216546356678, "logits/rejected": -0.010281506925821304, "logps/chosen": -209.59246826171875, "logps/rejected": -410.0220031738281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.824805736541748, "rewards/margins": 16.52634048461914, "rewards/rejected": -20.351146697998047, "step": 5449 }, { "epoch": 1.86, "learning_rate": 2.5566426493902614e-08, "logits/chosen": 0.04987188056111336, "logits/rejected": 0.07911711931228638, "logps/chosen": -243.54177856445312, "logps/rejected": -418.4158935546875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.0955679416656494, "rewards/margins": 19.34097671508789, "rewards/rejected": -20.436542510986328, "step": 5450 }, { "epoch": 1.86, "learning_rate": 2.544239521634628e-08, "logits/chosen": 0.15126946568489075, "logits/rejected": 0.14959962666034698, "logps/chosen": -167.85926818847656, "logps/rejected": -318.79888916015625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.626354455947876, "rewards/margins": 13.457650184631348, "rewards/rejected": -16.084003448486328, "step": 5451 }, { "epoch": 1.86, "learning_rate": 2.5318661652689034e-08, "logits/chosen": 0.05034349486231804, "logits/rejected": 0.08486221730709076, "logps/chosen": -226.75732421875, "logps/rejected": -384.09014892578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.9360300302505493, "rewards/margins": 18.966585159301758, "rewards/rejected": -20.90261459350586, "step": 5452 }, { "epoch": 1.86, "learning_rate": 2.519522584072986e-08, "logits/chosen": 0.1021859273314476, "logits/rejected": 0.12128090113401413, "logps/chosen": -171.439208984375, "logps/rejected": -320.6839599609375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.4784324169158936, "rewards/margins": 14.590404510498047, "rewards/rejected": -17.068838119506836, "step": 5453 }, { "epoch": 1.86, "learning_rate": 2.507208781817638e-08, "logits/chosen": 0.08850053697824478, "logits/rejected": 0.11832257360219955, "logps/chosen": -236.77151489257812, "logps/rejected": -353.85101318359375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.139166831970215, "rewards/margins": 14.086898803710938, "rewards/rejected": -16.226064682006836, "step": 5454 }, { "epoch": 1.86, "learning_rate": 2.4949247622645942e-08, "logits/chosen": 0.10075876116752625, "logits/rejected": 0.10097876936197281, "logps/chosen": -171.93905639648438, "logps/rejected": -394.2250671386719, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.9567545652389526, "rewards/margins": 19.26603889465332, "rewards/rejected": -21.22279167175293, "step": 5455 }, { "epoch": 1.86, "learning_rate": 2.4826705291664195e-08, "logits/chosen": -0.07698767632246017, "logits/rejected": -0.08434977382421494, "logps/chosen": -242.95382690429688, "logps/rejected": -480.5153503417969, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.844815492630005, "rewards/margins": 19.509029388427734, "rewards/rejected": -22.353843688964844, "step": 5456 }, { "epoch": 1.86, "learning_rate": 2.470446086266653e-08, "logits/chosen": 0.15484564006328583, "logits/rejected": 0.1991448700428009, "logps/chosen": -172.73089599609375, "logps/rejected": -232.46011352539062, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.6860917806625366, "rewards/margins": 12.974409103393555, "rewards/rejected": -13.660500526428223, "step": 5457 }, { "epoch": 1.86, "learning_rate": 2.458251437299652e-08, "logits/chosen": 0.07216578722000122, "logits/rejected": 0.11183606833219528, "logps/chosen": -165.88009643554688, "logps/rejected": -338.5975646972656, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.1210172176361084, "rewards/margins": 17.336261749267578, "rewards/rejected": -18.457279205322266, "step": 5458 }, { "epoch": 1.86, "learning_rate": 2.4460865859907252e-08, "logits/chosen": 0.0013430675026029348, "logits/rejected": 0.035325631499290466, "logps/chosen": -211.35739135742188, "logps/rejected": -335.5294189453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.446965217590332, "rewards/margins": 15.702777862548828, "rewards/rejected": -17.149742126464844, "step": 5459 }, { "epoch": 1.86, "learning_rate": 2.4339515360561004e-08, "logits/chosen": -0.012532605789601803, "logits/rejected": 0.009546381421387196, "logps/chosen": -225.5990447998047, "logps/rejected": -328.34405517578125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.506585955619812, "rewards/margins": 12.425337791442871, "rewards/rejected": -13.931924819946289, "step": 5460 }, { "epoch": 1.86, "learning_rate": 2.4218462912028115e-08, "logits/chosen": 0.036699533462524414, "logits/rejected": 0.051084864884614944, "logps/chosen": -173.5912628173828, "logps/rejected": -336.1026611328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.542238712310791, "rewards/margins": 15.12390422821045, "rewards/rejected": -16.6661434173584, "step": 5461 }, { "epoch": 1.86, "learning_rate": 2.4097708551288897e-08, "logits/chosen": 0.012022431008517742, "logits/rejected": 0.05805911496281624, "logps/chosen": -227.295166015625, "logps/rejected": -364.92138671875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.5186164975166321, "rewards/margins": 18.862964630126953, "rewards/rejected": -19.381580352783203, "step": 5462 }, { "epoch": 1.86, "learning_rate": 2.3977252315231955e-08, "logits/chosen": 0.0760219469666481, "logits/rejected": 0.11441996693611145, "logps/chosen": -144.49008178710938, "logps/rejected": -253.21463012695312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9722755551338196, "rewards/margins": 12.34732723236084, "rewards/rejected": -13.319601058959961, "step": 5463 }, { "epoch": 1.86, "learning_rate": 2.3857094240654852e-08, "logits/chosen": -0.10375133156776428, "logits/rejected": -0.07366987317800522, "logps/chosen": -292.07183837890625, "logps/rejected": -467.808837890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.0272390842437744, "rewards/margins": 20.085447311401367, "rewards/rejected": -22.112688064575195, "step": 5464 }, { "epoch": 1.87, "learning_rate": 2.3737234364264667e-08, "logits/chosen": 0.02591065876185894, "logits/rejected": 0.05852487310767174, "logps/chosen": -249.773681640625, "logps/rejected": -455.71466064453125, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -2.5253002643585205, "rewards/margins": 20.523969650268555, "rewards/rejected": -23.04926872253418, "step": 5465 }, { "epoch": 1.87, "learning_rate": 2.3617672722676564e-08, "logits/chosen": 0.13585801422595978, "logits/rejected": 0.16335122287273407, "logps/chosen": -198.80645751953125, "logps/rejected": -350.7200622558594, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1799449920654297, "rewards/margins": 15.807323455810547, "rewards/rejected": -16.987266540527344, "step": 5466 }, { "epoch": 1.87, "learning_rate": 2.349840935241487e-08, "logits/chosen": 0.031076697632670403, "logits/rejected": 0.04729032143950462, "logps/chosen": -172.85604858398438, "logps/rejected": -317.4804382324219, "loss": 0.0306, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4351072311401367, "rewards/margins": 15.004674911499023, "rewards/rejected": -16.439783096313477, "step": 5467 }, { "epoch": 1.87, "learning_rate": 2.337944428991334e-08, "logits/chosen": 0.12601973116397858, "logits/rejected": 0.15348286926746368, "logps/chosen": -204.41734313964844, "logps/rejected": -311.8188171386719, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -0.24281272292137146, "rewards/margins": 15.121943473815918, "rewards/rejected": -15.364755630493164, "step": 5468 }, { "epoch": 1.87, "learning_rate": 2.326077757151379e-08, "logits/chosen": -0.020733313634991646, "logits/rejected": 0.016132688149809837, "logps/chosen": -219.81443786621094, "logps/rejected": -414.5728759765625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.4719194173812866, "rewards/margins": 21.808921813964844, "rewards/rejected": -23.280841827392578, "step": 5469 }, { "epoch": 1.87, "learning_rate": 2.314240923346744e-08, "logits/chosen": -0.006732813082635403, "logits/rejected": 0.05075805261731148, "logps/chosen": -240.43374633789062, "logps/rejected": -309.7881164550781, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.0665127038955688, "rewards/margins": 14.804814338684082, "rewards/rejected": -15.871326446533203, "step": 5470 }, { "epoch": 1.87, "learning_rate": 2.302433931193426e-08, "logits/chosen": -0.012315169908106327, "logits/rejected": 0.026050178334116936, "logps/chosen": -287.2554931640625, "logps/rejected": -445.6095275878906, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5117905139923096, "rewards/margins": 18.5711612701416, "rewards/rejected": -20.082950592041016, "step": 5471 }, { "epoch": 1.87, "learning_rate": 2.2906567842982726e-08, "logits/chosen": 0.022114954888820648, "logits/rejected": 0.038770321756601334, "logps/chosen": -216.0423126220703, "logps/rejected": -379.3620910644531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.3595895767211914, "rewards/margins": 17.53780174255371, "rewards/rejected": -19.89739227294922, "step": 5472 }, { "epoch": 1.87, "learning_rate": 2.2789094862590507e-08, "logits/chosen": 0.007577212527394295, "logits/rejected": 0.06581758707761765, "logps/chosen": -263.36407470703125, "logps/rejected": -329.54754638671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.6739473938941956, "rewards/margins": 15.88053035736084, "rewards/rejected": -15.206583023071289, "step": 5473 }, { "epoch": 1.87, "learning_rate": 2.2671920406644008e-08, "logits/chosen": -0.029208147898316383, "logits/rejected": -0.018434524536132812, "logps/chosen": -288.4677734375, "logps/rejected": -524.3895874023438, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.8393242359161377, "rewards/margins": 19.24367332458496, "rewards/rejected": -22.082996368408203, "step": 5474 }, { "epoch": 1.87, "learning_rate": 2.2555044510938592e-08, "logits/chosen": 0.11442046612501144, "logits/rejected": 0.1376427263021469, "logps/chosen": -202.61001586914062, "logps/rejected": -366.35516357421875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.890686511993408, "rewards/margins": 17.668516159057617, "rewards/rejected": -21.559200286865234, "step": 5475 }, { "epoch": 1.87, "learning_rate": 2.2438467211177813e-08, "logits/chosen": 0.14445467293262482, "logits/rejected": 0.1498524397611618, "logps/chosen": -163.5175323486328, "logps/rejected": -362.847412109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.830571174621582, "rewards/margins": 17.101425170898438, "rewards/rejected": -18.931995391845703, "step": 5476 }, { "epoch": 1.87, "learning_rate": 2.232218854297474e-08, "logits/chosen": 0.11658427119255066, "logits/rejected": 0.11877261102199554, "logps/chosen": -169.70675659179688, "logps/rejected": -367.8257141113281, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5652735233306885, "rewards/margins": 17.512741088867188, "rewards/rejected": -19.07801628112793, "step": 5477 }, { "epoch": 1.87, "learning_rate": 2.2206208541850845e-08, "logits/chosen": -0.015439024195075035, "logits/rejected": 0.046888720244169235, "logps/chosen": -192.57077026367188, "logps/rejected": -296.4755554199219, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.0199954509735107, "rewards/margins": 13.096766471862793, "rewards/rejected": -15.116762161254883, "step": 5478 }, { "epoch": 1.87, "learning_rate": 2.2090527243236344e-08, "logits/chosen": 0.05190182849764824, "logits/rejected": 0.07250723242759705, "logps/chosen": -196.61004638671875, "logps/rejected": -345.938720703125, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -2.2212252616882324, "rewards/margins": 13.655895233154297, "rewards/rejected": -15.87712287902832, "step": 5479 }, { "epoch": 1.87, "learning_rate": 2.1975144682470413e-08, "logits/chosen": 0.05668172985315323, "logits/rejected": 0.10035108029842377, "logps/chosen": -214.38356018066406, "logps/rejected": -345.2176818847656, "loss": 0.0449, "rewards/accuracies": 0.9375, "rewards/chosen": -3.109442710876465, "rewards/margins": 16.256851196289062, "rewards/rejected": -19.366294860839844, "step": 5480 }, { "epoch": 1.87, "learning_rate": 2.1860060894800746e-08, "logits/chosen": -0.04346867650747299, "logits/rejected": -0.009603717364370823, "logps/chosen": -217.36854553222656, "logps/rejected": -313.7244873046875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.720291018486023, "rewards/margins": 14.321952819824219, "rewards/rejected": -15.042243957519531, "step": 5481 }, { "epoch": 1.87, "learning_rate": 2.1745275915383664e-08, "logits/chosen": 0.02058769203722477, "logits/rejected": 0.0584896020591259, "logps/chosen": -296.6466369628906, "logps/rejected": -426.7076416015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0457254648208618, "rewards/margins": 18.210969924926758, "rewards/rejected": -19.256694793701172, "step": 5482 }, { "epoch": 1.87, "learning_rate": 2.1630789779284674e-08, "logits/chosen": 0.14326520264148712, "logits/rejected": 0.16009005904197693, "logps/chosen": -196.97511291503906, "logps/rejected": -310.0816955566406, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.276350736618042, "rewards/margins": 12.467812538146973, "rewards/rejected": -14.744162559509277, "step": 5483 }, { "epoch": 1.87, "learning_rate": 2.151660252147769e-08, "logits/chosen": 0.08377858251333237, "logits/rejected": 0.1212974265217781, "logps/chosen": -240.10498046875, "logps/rejected": -392.79290771484375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8895069360733032, "rewards/margins": 18.510093688964844, "rewards/rejected": -19.399600982666016, "step": 5484 }, { "epoch": 1.87, "learning_rate": 2.1402714176845138e-08, "logits/chosen": 0.13595512509346008, "logits/rejected": 0.16679534316062927, "logps/chosen": -150.88406372070312, "logps/rejected": -275.71759033203125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.8508337140083313, "rewards/margins": 14.512386322021484, "rewards/rejected": -15.36322021484375, "step": 5485 }, { "epoch": 1.87, "learning_rate": 2.1289124780178635e-08, "logits/chosen": 0.052190717309713364, "logits/rejected": 0.08773864060640335, "logps/chosen": -240.93048095703125, "logps/rejected": -387.2506103515625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.331979990005493, "rewards/margins": 17.550594329833984, "rewards/rejected": -19.8825740814209, "step": 5486 }, { "epoch": 1.87, "learning_rate": 2.117583436617787e-08, "logits/chosen": 0.10668346285820007, "logits/rejected": 0.13376659154891968, "logps/chosen": -209.4989471435547, "logps/rejected": -374.1337585449219, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.1916844844818115, "rewards/margins": 16.254261016845703, "rewards/rejected": -19.445947647094727, "step": 5487 }, { "epoch": 1.87, "learning_rate": 2.106284296945171e-08, "logits/chosen": 0.0693296268582344, "logits/rejected": 0.10688118636608124, "logps/chosen": -264.7359313964844, "logps/rejected": -427.7235107421875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.703834891319275, "rewards/margins": 17.5943603515625, "rewards/rejected": -19.29819679260254, "step": 5488 }, { "epoch": 1.87, "learning_rate": 2.0950150624517326e-08, "logits/chosen": 0.03420982509851456, "logits/rejected": 0.07391882687807083, "logps/chosen": -213.2462921142578, "logps/rejected": -406.66680908203125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.1864384412765503, "rewards/margins": 20.364627838134766, "rewards/rejected": -21.55106544494629, "step": 5489 }, { "epoch": 1.87, "learning_rate": 2.0837757365800846e-08, "logits/chosen": 0.08084093779325485, "logits/rejected": 0.11568433046340942, "logps/chosen": -258.8133544921875, "logps/rejected": -479.4048767089844, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.833278179168701, "rewards/margins": 22.097265243530273, "rewards/rejected": -24.9305419921875, "step": 5490 }, { "epoch": 1.87, "learning_rate": 2.072566322763669e-08, "logits/chosen": -0.035952337086200714, "logits/rejected": -0.024109892547130585, "logps/chosen": -204.7605438232422, "logps/rejected": -357.14404296875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.105498790740967, "rewards/margins": 15.2554349899292, "rewards/rejected": -17.360933303833008, "step": 5491 }, { "epoch": 1.87, "learning_rate": 2.0613868244268138e-08, "logits/chosen": 0.040572356432676315, "logits/rejected": 0.05506506562232971, "logps/chosen": -200.46844482421875, "logps/rejected": -383.601806640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5674734115600586, "rewards/margins": 17.351755142211914, "rewards/rejected": -18.919227600097656, "step": 5492 }, { "epoch": 1.87, "learning_rate": 2.0502372449847095e-08, "logits/chosen": 0.08211369812488556, "logits/rejected": 0.11399578303098679, "logps/chosen": -191.58204650878906, "logps/rejected": -295.9721984863281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.1172962188720703, "rewards/margins": 13.073948860168457, "rewards/rejected": -15.191244125366211, "step": 5493 }, { "epoch": 1.88, "learning_rate": 2.0391175878433643e-08, "logits/chosen": -0.015115615911781788, "logits/rejected": 0.02546069771051407, "logps/chosen": -263.6015625, "logps/rejected": -387.091552734375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.5002596378326416, "rewards/margins": 15.159085273742676, "rewards/rejected": -16.659345626831055, "step": 5494 }, { "epoch": 1.88, "learning_rate": 2.0280278563997387e-08, "logits/chosen": 0.020876958966255188, "logits/rejected": 0.06327085942029953, "logps/chosen": -167.0747528076172, "logps/rejected": -294.55841064453125, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -2.1899313926696777, "rewards/margins": 13.596304893493652, "rewards/rejected": -15.786235809326172, "step": 5495 }, { "epoch": 1.88, "learning_rate": 2.0169680540415456e-08, "logits/chosen": 0.05505512282252312, "logits/rejected": 0.055541254580020905, "logps/chosen": -165.47047424316406, "logps/rejected": -356.34466552734375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.9431085586547852, "rewards/margins": 15.129522323608398, "rewards/rejected": -17.072629928588867, "step": 5496 }, { "epoch": 1.88, "learning_rate": 2.0059381841474042e-08, "logits/chosen": 0.05950206145644188, "logits/rejected": 0.10905124992132187, "logps/chosen": -205.1918487548828, "logps/rejected": -362.3254699707031, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.6738470196723938, "rewards/margins": 19.332548141479492, "rewards/rejected": -20.00639533996582, "step": 5497 }, { "epoch": 1.88, "learning_rate": 1.9949382500868196e-08, "logits/chosen": -0.03008974716067314, "logits/rejected": 0.0294599961489439, "logps/chosen": -256.1587829589844, "logps/rejected": -440.10540771484375, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -4.161825180053711, "rewards/margins": 20.30970001220703, "rewards/rejected": -24.471527099609375, "step": 5498 }, { "epoch": 1.88, "learning_rate": 1.983968255220103e-08, "logits/chosen": 0.1672167330980301, "logits/rejected": 0.19348108768463135, "logps/chosen": -199.99813842773438, "logps/rejected": -293.6244201660156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.4711122512817383, "rewards/margins": 12.487257957458496, "rewards/rejected": -14.958369255065918, "step": 5499 }, { "epoch": 1.88, "learning_rate": 1.973028202898419e-08, "logits/chosen": 0.0736389234662056, "logits/rejected": 0.10617984086275101, "logps/chosen": -166.58245849609375, "logps/rejected": -233.7393035888672, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.983560562133789, "rewards/margins": 11.101160049438477, "rewards/rejected": -13.08471965789795, "step": 5500 }, { "epoch": 1.88, "learning_rate": 1.9621180964638273e-08, "logits/chosen": -0.022655785083770752, "logits/rejected": 0.02465587481856346, "logps/chosen": -197.01727294921875, "logps/rejected": -355.9091491699219, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.0609054565429688, "rewards/margins": 18.513721466064453, "rewards/rejected": -19.574626922607422, "step": 5501 }, { "epoch": 1.88, "learning_rate": 1.951237939249195e-08, "logits/chosen": 0.10310763120651245, "logits/rejected": 0.13751450181007385, "logps/chosen": -180.77850341796875, "logps/rejected": -308.478271484375, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -2.704291582107544, "rewards/margins": 14.322002410888672, "rewards/rejected": -17.02629280090332, "step": 5502 }, { "epoch": 1.88, "learning_rate": 1.9403877345782748e-08, "logits/chosen": 0.001545849721878767, "logits/rejected": 0.044239677488803864, "logps/chosen": -178.88035583496094, "logps/rejected": -278.8846740722656, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -1.4997345209121704, "rewards/margins": 13.956103324890137, "rewards/rejected": -15.455836296081543, "step": 5503 }, { "epoch": 1.88, "learning_rate": 1.9295674857656486e-08, "logits/chosen": -0.09748534113168716, "logits/rejected": -0.04371511936187744, "logps/chosen": -305.746337890625, "logps/rejected": -400.50555419921875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.7271164655685425, "rewards/margins": 17.167098999023438, "rewards/rejected": -17.894216537475586, "step": 5504 }, { "epoch": 1.88, "learning_rate": 1.918777196116761e-08, "logits/chosen": 0.1782589554786682, "logits/rejected": 0.19194777309894562, "logps/chosen": -162.54159545898438, "logps/rejected": -317.78009033203125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.11550235748291, "rewards/margins": 14.672492980957031, "rewards/rejected": -17.787994384765625, "step": 5505 }, { "epoch": 1.88, "learning_rate": 1.9080168689278752e-08, "logits/chosen": 0.048723895102739334, "logits/rejected": 0.09178576618432999, "logps/chosen": -205.83596801757812, "logps/rejected": -380.7623291015625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.705927848815918, "rewards/margins": 19.415178298950195, "rewards/rejected": -22.121105194091797, "step": 5506 }, { "epoch": 1.88, "learning_rate": 1.89728650748614e-08, "logits/chosen": 0.015258095227181911, "logits/rejected": 0.029182586818933487, "logps/chosen": -276.3906555175781, "logps/rejected": -461.2032470703125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.152584195137024, "rewards/margins": 18.88566017150879, "rewards/rejected": -20.038240432739258, "step": 5507 }, { "epoch": 1.88, "learning_rate": 1.886586115069544e-08, "logits/chosen": 0.017412785440683365, "logits/rejected": 0.06941582262516022, "logps/chosen": -254.7512969970703, "logps/rejected": -360.5712890625, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -2.468209981918335, "rewards/margins": 15.156155586242676, "rewards/rejected": -17.624366760253906, "step": 5508 }, { "epoch": 1.88, "learning_rate": 1.8759156949468837e-08, "logits/chosen": 0.11313232034444809, "logits/rejected": 0.1540118306875229, "logps/chosen": -192.52001953125, "logps/rejected": -223.39947509765625, "loss": 0.0234, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8680471181869507, "rewards/margins": 9.890912055969238, "rewards/rejected": -11.758959770202637, "step": 5509 }, { "epoch": 1.88, "learning_rate": 1.8652752503778403e-08, "logits/chosen": 0.010778547264635563, "logits/rejected": 0.06561288237571716, "logps/chosen": -227.13934326171875, "logps/rejected": -403.4063415527344, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.322681188583374, "rewards/margins": 19.221681594848633, "rewards/rejected": -20.54436492919922, "step": 5510 }, { "epoch": 1.88, "learning_rate": 1.854664784612925e-08, "logits/chosen": 0.004983430728316307, "logits/rejected": 0.01646244525909424, "logps/chosen": -168.56642150878906, "logps/rejected": -339.3968505859375, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -2.021902084350586, "rewards/margins": 14.857765197753906, "rewards/rejected": -16.879667282104492, "step": 5511 }, { "epoch": 1.88, "learning_rate": 1.844084300893456e-08, "logits/chosen": 0.09619852900505066, "logits/rejected": 0.12184162437915802, "logps/chosen": -160.4386444091797, "logps/rejected": -264.7497863769531, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.9997507333755493, "rewards/margins": 11.856369018554688, "rewards/rejected": -13.856121063232422, "step": 5512 }, { "epoch": 1.88, "learning_rate": 1.833533802451681e-08, "logits/chosen": 0.0430678091943264, "logits/rejected": 0.051687091588974, "logps/chosen": -272.8184814453125, "logps/rejected": -485.0042724609375, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -3.9813764095306396, "rewards/margins": 19.290987014770508, "rewards/rejected": -23.272363662719727, "step": 5513 }, { "epoch": 1.88, "learning_rate": 1.8230132925106e-08, "logits/chosen": 0.09384553134441376, "logits/rejected": 0.10433245450258255, "logps/chosen": -171.72096252441406, "logps/rejected": -312.1764831542969, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.5475709438323975, "rewards/margins": 13.125998497009277, "rewards/rejected": -15.673568725585938, "step": 5514 }, { "epoch": 1.88, "learning_rate": 1.8125227742840643e-08, "logits/chosen": 0.09636561572551727, "logits/rejected": 0.11557426303625107, "logps/chosen": -158.96746826171875, "logps/rejected": -280.9998474121094, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.507845401763916, "rewards/margins": 13.574026107788086, "rewards/rejected": -14.081871032714844, "step": 5515 }, { "epoch": 1.88, "learning_rate": 1.8020622509768325e-08, "logits/chosen": 0.13056430220603943, "logits/rejected": 0.14889192581176758, "logps/chosen": -173.0408935546875, "logps/rejected": -281.90667724609375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.486349493265152, "rewards/margins": 12.01826286315918, "rewards/rejected": -12.504611015319824, "step": 5516 }, { "epoch": 1.88, "learning_rate": 1.7916317257844037e-08, "logits/chosen": 0.09155851602554321, "logits/rejected": 0.12342695146799088, "logps/chosen": -197.3665008544922, "logps/rejected": -383.30767822265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974943399429321, "rewards/margins": 19.672616958618164, "rewards/rejected": -20.67011070251465, "step": 5517 }, { "epoch": 1.88, "learning_rate": 1.781231201893174e-08, "logits/chosen": 0.03871713951230049, "logits/rejected": 0.08652878552675247, "logps/chosen": -212.87147521972656, "logps/rejected": -399.4713439941406, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": -1.710015892982483, "rewards/margins": 18.230552673339844, "rewards/rejected": -19.940567016601562, "step": 5518 }, { "epoch": 1.88, "learning_rate": 1.7708606824803795e-08, "logits/chosen": 0.08696902543306351, "logits/rejected": 0.10904117673635483, "logps/chosen": -209.14907836914062, "logps/rejected": -338.89996337890625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.9955823421478271, "rewards/margins": 16.011314392089844, "rewards/rejected": -18.00689697265625, "step": 5519 }, { "epoch": 1.88, "learning_rate": 1.7605201707140416e-08, "logits/chosen": 0.11970977485179901, "logits/rejected": 0.14774973690509796, "logps/chosen": -219.48997497558594, "logps/rejected": -439.0596923828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.2891805171966553, "rewards/margins": 21.979721069335938, "rewards/rejected": -25.268901824951172, "step": 5520 }, { "epoch": 1.88, "learning_rate": 1.7502096697530667e-08, "logits/chosen": 0.05338636413216591, "logits/rejected": 0.06094497814774513, "logps/chosen": -155.01553344726562, "logps/rejected": -366.8153076171875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.25852370262146, "rewards/margins": 19.915285110473633, "rewards/rejected": -22.173810958862305, "step": 5521 }, { "epoch": 1.88, "learning_rate": 1.7399291827471462e-08, "logits/chosen": 0.05832182615995407, "logits/rejected": 0.07742198556661606, "logps/chosen": -250.3634033203125, "logps/rejected": -428.7831115722656, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.835798740386963, "rewards/margins": 18.089624404907227, "rewards/rejected": -20.925424575805664, "step": 5522 }, { "epoch": 1.88, "learning_rate": 1.7296787128368572e-08, "logits/chosen": 0.03269992768764496, "logits/rejected": 0.0573531836271286, "logps/chosen": -241.44833374023438, "logps/rejected": -444.38128662109375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.8196231126785278, "rewards/margins": 17.708879470825195, "rewards/rejected": -19.528501510620117, "step": 5523 }, { "epoch": 1.89, "learning_rate": 1.7194582631535614e-08, "logits/chosen": 0.06363438069820404, "logits/rejected": 0.07044655829668045, "logps/chosen": -218.6259765625, "logps/rejected": -401.5240478515625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.9532735347747803, "rewards/margins": 16.200225830078125, "rewards/rejected": -19.153499603271484, "step": 5524 }, { "epoch": 1.89, "learning_rate": 1.70926783681945e-08, "logits/chosen": 0.09066880494356155, "logits/rejected": 0.12364007532596588, "logps/chosen": -207.2033233642578, "logps/rejected": -347.21722412109375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.19837570190429688, "rewards/margins": 16.062744140625, "rewards/rejected": -16.26112174987793, "step": 5525 }, { "epoch": 1.89, "learning_rate": 1.6991074369475888e-08, "logits/chosen": 0.16876229643821716, "logits/rejected": 0.20934557914733887, "logps/chosen": -203.89341735839844, "logps/rejected": -327.9830322265625, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.1386923789978027, "rewards/margins": 17.122329711914062, "rewards/rejected": -18.261024475097656, "step": 5526 }, { "epoch": 1.89, "learning_rate": 1.6889770666418167e-08, "logits/chosen": -0.10507862269878387, "logits/rejected": -0.05591372773051262, "logps/chosen": -306.9886474609375, "logps/rejected": -458.386962890625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0380529165267944, "rewards/margins": 16.647005081176758, "rewards/rejected": -17.68505859375, "step": 5527 }, { "epoch": 1.89, "learning_rate": 1.6788767289968254e-08, "logits/chosen": 0.018773892894387245, "logits/rejected": 0.05031348764896393, "logps/chosen": -195.3591766357422, "logps/rejected": -367.7624816894531, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.5475268363952637, "rewards/margins": 18.446250915527344, "rewards/rejected": -19.993778228759766, "step": 5528 }, { "epoch": 1.89, "learning_rate": 1.668806427098146e-08, "logits/chosen": 0.14951913058757782, "logits/rejected": 0.17689108848571777, "logps/chosen": -263.9037780761719, "logps/rejected": -421.41217041015625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9986884593963623, "rewards/margins": 18.9354305267334, "rewards/rejected": -19.934118270874023, "step": 5529 }, { "epoch": 1.89, "learning_rate": 1.658766164022085e-08, "logits/chosen": -0.023227741941809654, "logits/rejected": 0.03600704297423363, "logps/chosen": -274.4954833984375, "logps/rejected": -372.3660583496094, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.6542322635650635, "rewards/margins": 15.87552547454834, "rewards/rejected": -16.52975845336914, "step": 5530 }, { "epoch": 1.89, "learning_rate": 1.6487559428358333e-08, "logits/chosen": 0.18148256838321686, "logits/rejected": 0.21627141535282135, "logps/chosen": -257.12353515625, "logps/rejected": -414.89422607421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9347018003463745, "rewards/margins": 21.632240295410156, "rewards/rejected": -22.566940307617188, "step": 5531 }, { "epoch": 1.89, "learning_rate": 1.6387757665973556e-08, "logits/chosen": -0.0491274856030941, "logits/rejected": 0.03338930010795593, "logps/chosen": -293.20880126953125, "logps/rejected": -310.1984558105469, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.03336086869239807, "rewards/margins": 16.63435935974121, "rewards/rejected": -16.60099983215332, "step": 5532 }, { "epoch": 1.89, "learning_rate": 1.6288256383554798e-08, "logits/chosen": -0.02475389465689659, "logits/rejected": 0.008201822638511658, "logps/chosen": -296.8371887207031, "logps/rejected": -453.1162109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.4924972653388977, "rewards/margins": 20.361953735351562, "rewards/rejected": -19.869457244873047, "step": 5533 }, { "epoch": 1.89, "learning_rate": 1.618905561149808e-08, "logits/chosen": 0.01338952500373125, "logits/rejected": 0.022243689745664597, "logps/chosen": -156.7880859375, "logps/rejected": -351.2236022949219, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.497786521911621, "rewards/margins": 16.488752365112305, "rewards/rejected": -18.98653793334961, "step": 5534 }, { "epoch": 1.89, "learning_rate": 1.609015538010805e-08, "logits/chosen": 0.04370632395148277, "logits/rejected": 0.050513360649347305, "logps/chosen": -207.80453491210938, "logps/rejected": -417.2552185058594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.176963448524475, "rewards/margins": 19.222740173339844, "rewards/rejected": -20.399703979492188, "step": 5535 }, { "epoch": 1.89, "learning_rate": 1.5991555719597207e-08, "logits/chosen": 0.09407620877027512, "logits/rejected": 0.13288788497447968, "logps/chosen": -189.3061981201172, "logps/rejected": -292.94244384765625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.244307279586792, "rewards/margins": 13.30997085571289, "rewards/rejected": -14.554278373718262, "step": 5536 }, { "epoch": 1.89, "learning_rate": 1.5893256660086672e-08, "logits/chosen": 0.005871784873306751, "logits/rejected": 0.03290960192680359, "logps/chosen": -171.9717254638672, "logps/rejected": -302.7084655761719, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.3458028733730316, "rewards/margins": 14.504175186157227, "rewards/rejected": -14.84997844696045, "step": 5537 }, { "epoch": 1.89, "learning_rate": 1.5795258231605102e-08, "logits/chosen": 0.06771498918533325, "logits/rejected": 0.06531739234924316, "logps/chosen": -180.73348999023438, "logps/rejected": -360.805419921875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.7001189589500427, "rewards/margins": 15.948063850402832, "rewards/rejected": -16.648181915283203, "step": 5538 }, { "epoch": 1.89, "learning_rate": 1.5697560464089767e-08, "logits/chosen": 0.14782153069972992, "logits/rejected": 0.1319105178117752, "logps/chosen": -178.0033721923828, "logps/rejected": -414.562255859375, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": -1.2793710231781006, "rewards/margins": 20.174837112426758, "rewards/rejected": -21.45421028137207, "step": 5539 }, { "epoch": 1.89, "learning_rate": 1.5600163387386124e-08, "logits/chosen": 0.12605875730514526, "logits/rejected": 0.18678288161754608, "logps/chosen": -217.36036682128906, "logps/rejected": -334.0364990234375, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.4746912717819214, "rewards/margins": 17.006221771240234, "rewards/rejected": -17.480913162231445, "step": 5540 }, { "epoch": 1.89, "learning_rate": 1.5503067031247596e-08, "logits/chosen": 0.05755539983510971, "logits/rejected": 0.11063122749328613, "logps/chosen": -233.63720703125, "logps/rejected": -393.6670227050781, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.4326677322387695, "rewards/margins": 17.368473052978516, "rewards/rejected": -19.80113983154297, "step": 5541 }, { "epoch": 1.89, "learning_rate": 1.540627142533568e-08, "logits/chosen": -0.01709231548011303, "logits/rejected": 0.014112805016338825, "logps/chosen": -233.67694091796875, "logps/rejected": -364.1651611328125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2237443923950195, "rewards/margins": 13.048368453979492, "rewards/rejected": -14.272113800048828, "step": 5542 }, { "epoch": 1.89, "learning_rate": 1.530977659922017e-08, "logits/chosen": 0.06076253578066826, "logits/rejected": 0.100740447640419, "logps/chosen": -207.2310333251953, "logps/rejected": -292.26470947265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.716461181640625, "rewards/margins": 11.665855407714844, "rewards/rejected": -14.382316589355469, "step": 5543 }, { "epoch": 1.89, "learning_rate": 1.5213582582378925e-08, "logits/chosen": -0.1369517743587494, "logits/rejected": -0.09302272647619247, "logps/chosen": -209.58468627929688, "logps/rejected": -371.7193603515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4756044149398804, "rewards/margins": 15.731003761291504, "rewards/rejected": -17.206607818603516, "step": 5544 }, { "epoch": 1.89, "learning_rate": 1.5117689404197664e-08, "logits/chosen": 0.07087494432926178, "logits/rejected": 0.09614994376897812, "logps/chosen": -214.99078369140625, "logps/rejected": -390.9251708984375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.0716211795806885, "rewards/margins": 17.8966121673584, "rewards/rejected": -19.968233108520508, "step": 5545 }, { "epoch": 1.89, "learning_rate": 1.5022097093970843e-08, "logits/chosen": 0.012211816385388374, "logits/rejected": 0.03190218284726143, "logps/chosen": -198.95831298828125, "logps/rejected": -445.595947265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.8319891691207886, "rewards/margins": 21.430641174316406, "rewards/rejected": -23.262630462646484, "step": 5546 }, { "epoch": 1.89, "learning_rate": 1.492680568090032e-08, "logits/chosen": -0.010930532589554787, "logits/rejected": 0.02113141305744648, "logps/chosen": -251.55442810058594, "logps/rejected": -420.8482666015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0614733695983887, "rewards/margins": 18.375865936279297, "rewards/rejected": -19.437339782714844, "step": 5547 }, { "epoch": 1.89, "learning_rate": 1.4831815194096264e-08, "logits/chosen": 0.08604476600885391, "logits/rejected": 0.08665259182453156, "logps/chosen": -171.8291778564453, "logps/rejected": -342.8736572265625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.24289566278457642, "rewards/margins": 16.07143211364746, "rewards/rejected": -16.3143253326416, "step": 5548 }, { "epoch": 1.89, "learning_rate": 1.4737125662577122e-08, "logits/chosen": 0.1058526411652565, "logits/rejected": 0.14279109239578247, "logps/chosen": -195.98309326171875, "logps/rejected": -381.66961669921875, "loss": 0.0928, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7932214736938477, "rewards/margins": 17.66952896118164, "rewards/rejected": -20.462749481201172, "step": 5549 }, { "epoch": 1.89, "learning_rate": 1.4642737115269089e-08, "logits/chosen": 0.026693273335695267, "logits/rejected": 0.08269882947206497, "logps/chosen": -232.4875030517578, "logps/rejected": -442.3115539550781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8313472270965576, "rewards/margins": 21.82552719116211, "rewards/rejected": -22.65687370300293, "step": 5550 }, { "epoch": 1.89, "learning_rate": 1.4548649581006766e-08, "logits/chosen": 0.007204219698905945, "logits/rejected": 0.02140781469643116, "logps/chosen": -252.0010986328125, "logps/rejected": -449.7344665527344, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.1049551963806152, "rewards/margins": 19.154096603393555, "rewards/rejected": -21.25905418395996, "step": 5551 }, { "epoch": 1.89, "learning_rate": 1.4454863088532387e-08, "logits/chosen": -0.018715720623731613, "logits/rejected": -0.006176679860800505, "logps/chosen": -218.29129028320312, "logps/rejected": -396.6064147949219, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.6977057456970215, "rewards/margins": 16.817668914794922, "rewards/rejected": -18.51537322998047, "step": 5552 }, { "epoch": 1.9, "learning_rate": 1.4361377666496477e-08, "logits/chosen": -0.031104685738682747, "logits/rejected": -0.010311121121048927, "logps/chosen": -244.5032501220703, "logps/rejected": -427.8437805175781, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -1.8522073030471802, "rewards/margins": 18.070144653320312, "rewards/rejected": -19.922351837158203, "step": 5553 }, { "epoch": 1.9, "learning_rate": 1.4268193343457635e-08, "logits/chosen": 0.1465321183204651, "logits/rejected": 0.16792356967926025, "logps/chosen": -145.38775634765625, "logps/rejected": -308.33319091796875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.5892738699913025, "rewards/margins": 16.02860450744629, "rewards/rejected": -16.617877960205078, "step": 5554 }, { "epoch": 1.9, "learning_rate": 1.4175310147882313e-08, "logits/chosen": 0.09714927524328232, "logits/rejected": 0.1131182312965393, "logps/chosen": -181.1877899169922, "logps/rejected": -332.6522216796875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.8191447257995605, "rewards/margins": 13.175829887390137, "rewards/rejected": -16.99497413635254, "step": 5555 }, { "epoch": 1.9, "learning_rate": 1.4082728108145147e-08, "logits/chosen": 0.10622144490480423, "logits/rejected": 0.11335910856723785, "logps/chosen": -203.1416473388672, "logps/rejected": -389.271484375, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.3585994243621826, "rewards/margins": 17.715667724609375, "rewards/rejected": -19.07426643371582, "step": 5556 }, { "epoch": 1.9, "learning_rate": 1.3990447252528515e-08, "logits/chosen": 0.033134546130895615, "logits/rejected": 0.04494396969676018, "logps/chosen": -256.30426025390625, "logps/rejected": -442.5166320800781, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.343940258026123, "rewards/margins": 20.196348190307617, "rewards/rejected": -21.5402889251709, "step": 5557 }, { "epoch": 1.9, "learning_rate": 1.3898467609222974e-08, "logits/chosen": 0.08328010886907578, "logits/rejected": 0.09555590897798538, "logps/chosen": -196.34698486328125, "logps/rejected": -356.87322998046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.1615569591522217, "rewards/margins": 15.238567352294922, "rewards/rejected": -17.400123596191406, "step": 5558 }, { "epoch": 1.9, "learning_rate": 1.3806789206327162e-08, "logits/chosen": 0.07080370932817459, "logits/rejected": 0.11028798669576645, "logps/chosen": -200.28387451171875, "logps/rejected": -384.7962646484375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -2.252253532409668, "rewards/margins": 20.391420364379883, "rewards/rejected": -22.643672943115234, "step": 5559 }, { "epoch": 1.9, "learning_rate": 1.3715412071847343e-08, "logits/chosen": 0.04101981222629547, "logits/rejected": 0.0825212374329567, "logps/chosen": -231.35975646972656, "logps/rejected": -380.41064453125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.2936582565307617, "rewards/margins": 18.615076065063477, "rewards/rejected": -20.908733367919922, "step": 5560 }, { "epoch": 1.9, "learning_rate": 1.3624336233698297e-08, "logits/chosen": 0.1234910786151886, "logits/rejected": 0.12989479303359985, "logps/chosen": -172.60397338867188, "logps/rejected": -307.7500915527344, "loss": 0.0236, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9823946952819824, "rewards/margins": 13.354660034179688, "rewards/rejected": -16.337053298950195, "step": 5561 }, { "epoch": 1.9, "learning_rate": 1.3533561719702213e-08, "logits/chosen": 0.060246504843235016, "logits/rejected": 0.08741511404514313, "logps/chosen": -190.2859649658203, "logps/rejected": -352.18426513671875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.1390084028244019, "rewards/margins": 16.385089874267578, "rewards/rejected": -17.524099349975586, "step": 5562 }, { "epoch": 1.9, "learning_rate": 1.3443088557589355e-08, "logits/chosen": 0.019506722688674927, "logits/rejected": 0.051404181867837906, "logps/chosen": -244.35694885253906, "logps/rejected": -429.67852783203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.596673846244812, "rewards/margins": 20.360445022583008, "rewards/rejected": -21.957115173339844, "step": 5563 }, { "epoch": 1.9, "learning_rate": 1.335291677499828e-08, "logits/chosen": 0.1127287819981575, "logits/rejected": 0.13615946471691132, "logps/chosen": -222.79165649414062, "logps/rejected": -348.9136047363281, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.057978272438049316, "rewards/margins": 14.788850784301758, "rewards/rejected": -14.84682846069336, "step": 5564 }, { "epoch": 1.9, "learning_rate": 1.3263046399475064e-08, "logits/chosen": 0.07032616436481476, "logits/rejected": 0.10041874647140503, "logps/chosen": -227.42108154296875, "logps/rejected": -406.2759094238281, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.3745923042297363, "rewards/margins": 18.57133674621582, "rewards/rejected": -20.9459285736084, "step": 5565 }, { "epoch": 1.9, "learning_rate": 1.3173477458473859e-08, "logits/chosen": 0.0980534628033638, "logits/rejected": 0.12658743560314178, "logps/chosen": -192.5291748046875, "logps/rejected": -409.6575927734375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.842761516571045, "rewards/margins": 20.436443328857422, "rewards/rejected": -22.279207229614258, "step": 5566 }, { "epoch": 1.9, "learning_rate": 1.3084209979356886e-08, "logits/chosen": 0.06454186886548996, "logits/rejected": 0.08994615823030472, "logps/chosen": -236.23365783691406, "logps/rejected": -364.7899475097656, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.278080463409424, "rewards/margins": 13.856103897094727, "rewards/rejected": -16.134183883666992, "step": 5567 }, { "epoch": 1.9, "learning_rate": 1.2995243989393888e-08, "logits/chosen": -0.04234213009476662, "logits/rejected": 0.00979805551469326, "logps/chosen": -234.31790161132812, "logps/rejected": -345.43121337890625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.7250022888183594, "rewards/margins": 14.79920482635498, "rewards/rejected": -17.524206161499023, "step": 5568 }, { "epoch": 1.9, "learning_rate": 1.2906579515763016e-08, "logits/chosen": 0.052266862243413925, "logits/rejected": 0.07708314061164856, "logps/chosen": -251.2751922607422, "logps/rejected": -422.84466552734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.4091875553131104, "rewards/margins": 17.45609474182129, "rewards/rejected": -19.86528205871582, "step": 5569 }, { "epoch": 1.9, "learning_rate": 1.2818216585549824e-08, "logits/chosen": 0.06320548802614212, "logits/rejected": 0.07939153909683228, "logps/chosen": -190.66650390625, "logps/rejected": -332.0726318359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.2549543380737305, "rewards/margins": 15.497714042663574, "rewards/rejected": -17.752670288085938, "step": 5570 }, { "epoch": 1.9, "learning_rate": 1.2730155225748162e-08, "logits/chosen": 0.020217569544911385, "logits/rejected": 0.03178396448493004, "logps/chosen": -234.28277587890625, "logps/rejected": -415.1139221191406, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.061376690864563, "rewards/margins": 17.67881202697754, "rewards/rejected": -18.74018669128418, "step": 5571 }, { "epoch": 1.9, "learning_rate": 1.2642395463259404e-08, "logits/chosen": 0.039474762976169586, "logits/rejected": 0.06643295288085938, "logps/chosen": -196.9689178466797, "logps/rejected": -346.26055908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1920723915100098, "rewards/margins": 16.683265686035156, "rewards/rejected": -17.875335693359375, "step": 5572 }, { "epoch": 1.9, "learning_rate": 1.2554937324892989e-08, "logits/chosen": -0.029772942885756493, "logits/rejected": -0.0008589504286646843, "logps/chosen": -254.2266845703125, "logps/rejected": -418.1521301269531, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.5839208364486694, "rewards/margins": 17.492048263549805, "rewards/rejected": -19.075965881347656, "step": 5573 }, { "epoch": 1.9, "learning_rate": 1.2467780837366216e-08, "logits/chosen": 0.04146827012300491, "logits/rejected": 0.077653668820858, "logps/chosen": -202.11465454101562, "logps/rejected": -316.8445129394531, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5290007591247559, "rewards/margins": 15.527283668518066, "rewards/rejected": -17.056285858154297, "step": 5574 }, { "epoch": 1.9, "learning_rate": 1.2380926027304117e-08, "logits/chosen": 0.08335927873849869, "logits/rejected": 0.11519546806812286, "logps/chosen": -267.4608459472656, "logps/rejected": -411.2087707519531, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.6602323055267334, "rewards/margins": 17.6015625, "rewards/rejected": -19.26179313659668, "step": 5575 }, { "epoch": 1.9, "learning_rate": 1.2294372921239692e-08, "logits/chosen": 0.10436753183603287, "logits/rejected": 0.14343538880348206, "logps/chosen": -164.42886352539062, "logps/rejected": -274.43206787109375, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -0.5416522026062012, "rewards/margins": 12.639080047607422, "rewards/rejected": -13.180730819702148, "step": 5576 }, { "epoch": 1.9, "learning_rate": 1.2208121545613793e-08, "logits/chosen": 0.08699284493923187, "logits/rejected": 0.11354389786720276, "logps/chosen": -226.6982421875, "logps/rejected": -371.16546630859375, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -1.1259746551513672, "rewards/margins": 17.11068344116211, "rewards/rejected": -18.236658096313477, "step": 5577 }, { "epoch": 1.9, "learning_rate": 1.2122171926774671e-08, "logits/chosen": 0.11505717039108276, "logits/rejected": 0.13664843142032623, "logps/chosen": -222.44020080566406, "logps/rejected": -361.30084228515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.104616403579712, "rewards/margins": 15.812458038330078, "rewards/rejected": -18.91707420349121, "step": 5578 }, { "epoch": 1.9, "learning_rate": 1.2036524090979327e-08, "logits/chosen": 0.010880531743168831, "logits/rejected": 0.03450516611337662, "logps/chosen": -199.48834228515625, "logps/rejected": -364.79119873046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.1571385860443115, "rewards/margins": 17.565080642700195, "rewards/rejected": -19.722219467163086, "step": 5579 }, { "epoch": 1.9, "learning_rate": 1.1951178064391499e-08, "logits/chosen": 0.008889157325029373, "logits/rejected": 0.07049208134412766, "logps/chosen": -286.2416076660156, "logps/rejected": -412.3218688964844, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.7351011037826538, "rewards/margins": 18.508426666259766, "rewards/rejected": -19.243528366088867, "step": 5580 }, { "epoch": 1.9, "learning_rate": 1.1866133873083438e-08, "logits/chosen": 0.0679883137345314, "logits/rejected": 0.11489419639110565, "logps/chosen": -158.2926025390625, "logps/rejected": -319.035888671875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -1.0475269556045532, "rewards/margins": 16.67332649230957, "rewards/rejected": -17.72085189819336, "step": 5581 }, { "epoch": 1.91, "learning_rate": 1.1781391543034924e-08, "logits/chosen": 0.08584985136985779, "logits/rejected": 0.09848044067621231, "logps/chosen": -194.3184356689453, "logps/rejected": -385.5801086425781, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.8908395767211914, "rewards/margins": 18.156763076782227, "rewards/rejected": -19.047603607177734, "step": 5582 }, { "epoch": 1.91, "learning_rate": 1.1696951100133578e-08, "logits/chosen": -0.0016126504633575678, "logits/rejected": 0.012987359426915646, "logps/chosen": -215.7414093017578, "logps/rejected": -418.5290832519531, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -2.5631566047668457, "rewards/margins": 17.967296600341797, "rewards/rejected": -20.530452728271484, "step": 5583 }, { "epoch": 1.91, "learning_rate": 1.1612812570174768e-08, "logits/chosen": -0.02986900880932808, "logits/rejected": -0.017219040542840958, "logps/chosen": -196.33779907226562, "logps/rejected": -376.132080078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3229942321777344, "rewards/margins": 17.798370361328125, "rewards/rejected": -20.121362686157227, "step": 5584 }, { "epoch": 1.91, "learning_rate": 1.152897597886182e-08, "logits/chosen": 0.11657016724348068, "logits/rejected": 0.12176233530044556, "logps/chosen": -162.4092254638672, "logps/rejected": -389.3243408203125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.70774507522583, "rewards/margins": 17.076208114624023, "rewards/rejected": -20.783954620361328, "step": 5585 }, { "epoch": 1.91, "learning_rate": 1.1445441351805363e-08, "logits/chosen": -0.027436329051852226, "logits/rejected": -0.01058882661163807, "logps/chosen": -169.3893280029297, "logps/rejected": -299.06402587890625, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -0.9156301617622375, "rewards/margins": 12.453712463378906, "rewards/rejected": -13.369342803955078, "step": 5586 }, { "epoch": 1.91, "learning_rate": 1.1362208714524201e-08, "logits/chosen": -0.07920582592487335, "logits/rejected": -0.05260852351784706, "logps/chosen": -291.3050842285156, "logps/rejected": -427.2060546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.7253363132476807, "rewards/margins": 16.283544540405273, "rewards/rejected": -19.008880615234375, "step": 5587 }, { "epoch": 1.91, "learning_rate": 1.1279278092444889e-08, "logits/chosen": 0.10189804434776306, "logits/rejected": 0.13412466645240784, "logps/chosen": -186.00137329101562, "logps/rejected": -283.2354736328125, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3582708835601807, "rewards/margins": 13.873668670654297, "rewards/rejected": -15.231939315795898, "step": 5588 }, { "epoch": 1.91, "learning_rate": 1.119664951090149e-08, "logits/chosen": -0.026271790266036987, "logits/rejected": 0.0014098463580012321, "logps/chosen": -194.00918579101562, "logps/rejected": -361.1119079589844, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.3161498308181763, "rewards/margins": 19.167461395263672, "rewards/rejected": -20.483613967895508, "step": 5589 }, { "epoch": 1.91, "learning_rate": 1.1114322995135705e-08, "logits/chosen": 0.016970928758382797, "logits/rejected": 0.04444916173815727, "logps/chosen": -199.41207885742188, "logps/rejected": -396.35784912109375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.6227623224258423, "rewards/margins": 19.468460083007812, "rewards/rejected": -20.091222763061523, "step": 5590 }, { "epoch": 1.91, "learning_rate": 1.1032298570297417e-08, "logits/chosen": 0.12526869773864746, "logits/rejected": 0.18930530548095703, "logps/chosen": -247.4121551513672, "logps/rejected": -317.2902526855469, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.36861494183540344, "rewards/margins": 17.01710319519043, "rewards/rejected": -17.385719299316406, "step": 5591 }, { "epoch": 1.91, "learning_rate": 1.0950576261444022e-08, "logits/chosen": 0.0010763712925836444, "logits/rejected": 0.05618005618453026, "logps/chosen": -226.1750946044922, "logps/rejected": -389.2087707519531, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.9271469116210938, "rewards/margins": 17.549428939819336, "rewards/rejected": -20.47657585144043, "step": 5592 }, { "epoch": 1.91, "learning_rate": 1.0869156093540111e-08, "logits/chosen": 0.04205961897969246, "logits/rejected": 0.087708979845047, "logps/chosen": -231.47364807128906, "logps/rejected": -445.7640686035156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4981224536895752, "rewards/margins": 20.991003036499023, "rewards/rejected": -22.48912811279297, "step": 5593 }, { "epoch": 1.91, "learning_rate": 1.0788038091458895e-08, "logits/chosen": 0.06364762783050537, "logits/rejected": 0.07862343639135361, "logps/chosen": -221.28453063964844, "logps/rejected": -365.3559875488281, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.1885493993759155, "rewards/margins": 16.15212631225586, "rewards/rejected": -17.340675354003906, "step": 5594 }, { "epoch": 1.91, "learning_rate": 1.0707222279980665e-08, "logits/chosen": 0.019910497590899467, "logits/rejected": 0.05642281845211983, "logps/chosen": -179.6641082763672, "logps/rejected": -398.06005859375, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.6846683025360107, "rewards/margins": 20.87214469909668, "rewards/rejected": -23.556812286376953, "step": 5595 }, { "epoch": 1.91, "learning_rate": 1.062670868379334e-08, "logits/chosen": 0.15518367290496826, "logits/rejected": 0.18071657419204712, "logps/chosen": -143.40992736816406, "logps/rejected": -326.5211486816406, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.027697801589966, "rewards/margins": 17.68467903137207, "rewards/rejected": -19.71237564086914, "step": 5596 }, { "epoch": 1.91, "learning_rate": 1.054649732749291e-08, "logits/chosen": 0.12288107722997665, "logits/rejected": 0.15809856355190277, "logps/chosen": -182.22793579101562, "logps/rejected": -282.8864440917969, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.7176477909088135, "rewards/margins": 13.048713684082031, "rewards/rejected": -14.766361236572266, "step": 5597 }, { "epoch": 1.91, "learning_rate": 1.0466588235582774e-08, "logits/chosen": -0.08312353491783142, "logits/rejected": -0.0382775254547596, "logps/chosen": -211.54881286621094, "logps/rejected": -305.9031066894531, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.0381027460098267, "rewards/margins": 15.253250122070312, "rewards/rejected": -16.29135513305664, "step": 5598 }, { "epoch": 1.91, "learning_rate": 1.0386981432474073e-08, "logits/chosen": 0.07154552638530731, "logits/rejected": 0.13929085433483124, "logps/chosen": -235.02810668945312, "logps/rejected": -363.2657165527344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.3029874265193939, "rewards/margins": 16.83513641357422, "rewards/rejected": -16.532150268554688, "step": 5599 }, { "epoch": 1.91, "learning_rate": 1.0307676942485689e-08, "logits/chosen": 0.04713713005185127, "logits/rejected": 0.06451416015625, "logps/chosen": -183.2171630859375, "logps/rejected": -406.57086181640625, "loss": 0.0277, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6185951232910156, "rewards/margins": 20.010162353515625, "rewards/rejected": -22.628755569458008, "step": 5600 }, { "epoch": 1.91, "learning_rate": 1.0228674789843905e-08, "logits/chosen": -0.001698829815723002, "logits/rejected": 0.029899803921580315, "logps/chosen": -206.019287109375, "logps/rejected": -443.2183532714844, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -3.089766025543213, "rewards/margins": 20.67148208618164, "rewards/rejected": -23.761249542236328, "step": 5601 }, { "epoch": 1.91, "learning_rate": 1.0149974998682753e-08, "logits/chosen": -0.012086005881428719, "logits/rejected": 0.04385581985116005, "logps/chosen": -258.6896667480469, "logps/rejected": -423.79498291015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.319244623184204, "rewards/margins": 20.169132232666016, "rewards/rejected": -21.48837661743164, "step": 5602 }, { "epoch": 1.91, "learning_rate": 1.0071577593044e-08, "logits/chosen": 0.019181275740265846, "logits/rejected": 0.052954286336898804, "logps/chosen": -234.15098571777344, "logps/rejected": -368.97296142578125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.8069117069244385, "rewards/margins": 15.212498664855957, "rewards/rejected": -17.019411087036133, "step": 5603 }, { "epoch": 1.91, "learning_rate": 9.993482596877157e-09, "logits/chosen": 0.04832541570067406, "logits/rejected": 0.06426304578781128, "logps/chosen": -173.50804138183594, "logps/rejected": -332.0201721191406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.318418264389038, "rewards/margins": 17.215679168701172, "rewards/rejected": -18.53409767150879, "step": 5604 }, { "epoch": 1.91, "learning_rate": 9.915690034038805e-09, "logits/chosen": 0.0935368463397026, "logits/rejected": 0.11416583508253098, "logps/chosen": -209.2268524169922, "logps/rejected": -378.3348083496094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.32762807607650757, "rewards/margins": 17.875350952148438, "rewards/rejected": -18.202980041503906, "step": 5605 }, { "epoch": 1.91, "learning_rate": 9.83819992829371e-09, "logits/chosen": 0.024244561791419983, "logits/rejected": 0.07320649921894073, "logps/chosen": -210.7864532470703, "logps/rejected": -376.73309326171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5381865501403809, "rewards/margins": 20.905723571777344, "rewards/rejected": -21.44390869140625, "step": 5606 }, { "epoch": 1.91, "learning_rate": 9.761012303314165e-09, "logits/chosen": 0.05292763188481331, "logits/rejected": 0.07805319875478745, "logps/chosen": -179.14483642578125, "logps/rejected": -354.7323913574219, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -2.3305516242980957, "rewards/margins": 16.84226417541504, "rewards/rejected": -19.17281723022461, "step": 5607 }, { "epoch": 1.91, "learning_rate": 9.684127182679525e-09, "logits/chosen": 0.0969032347202301, "logits/rejected": 0.1023859903216362, "logps/chosen": -247.65234375, "logps/rejected": -488.2772216796875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.031456232070923, "rewards/margins": 21.751916885375977, "rewards/rejected": -23.78337287902832, "step": 5608 }, { "epoch": 1.91, "learning_rate": 9.607544589877559e-09, "logits/chosen": 0.09637583792209625, "logits/rejected": 0.13793432712554932, "logps/chosen": -226.0150146484375, "logps/rejected": -398.1522216796875, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -1.0086545944213867, "rewards/margins": 19.95269203186035, "rewards/rejected": -20.961347579956055, "step": 5609 }, { "epoch": 1.91, "learning_rate": 9.531264548302998e-09, "logits/chosen": -0.005185158457607031, "logits/rejected": 0.035173188894987106, "logps/chosen": -241.57005310058594, "logps/rejected": -320.19940185546875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.6269804239273071, "rewards/margins": 14.624934196472168, "rewards/rejected": -16.251914978027344, "step": 5610 }, { "epoch": 1.92, "learning_rate": 9.455287081258201e-09, "logits/chosen": 0.04591653496026993, "logits/rejected": 0.0656886175274849, "logps/chosen": -237.62155151367188, "logps/rejected": -460.78253173828125, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -2.134679079055786, "rewards/margins": 21.913116455078125, "rewards/rejected": -24.04779624938965, "step": 5611 }, { "epoch": 1.92, "learning_rate": 9.379612211953492e-09, "logits/chosen": 0.07213746756315231, "logits/rejected": 0.08885025233030319, "logps/chosen": -157.3359832763672, "logps/rejected": -338.31890869140625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.1220834255218506, "rewards/margins": 16.640491485595703, "rewards/rejected": -17.762577056884766, "step": 5612 }, { "epoch": 1.92, "learning_rate": 9.304239963506377e-09, "logits/chosen": 0.10477109998464584, "logits/rejected": 0.13897748291492462, "logps/chosen": -205.89920043945312, "logps/rejected": -289.7466735839844, "loss": 0.048, "rewards/accuracies": 0.9375, "rewards/chosen": -3.635829210281372, "rewards/margins": 12.984623908996582, "rewards/rejected": -16.620452880859375, "step": 5613 }, { "epoch": 1.92, "learning_rate": 9.229170358941996e-09, "logits/chosen": 0.14523427188396454, "logits/rejected": 0.16605094075202942, "logps/chosen": -179.0164794921875, "logps/rejected": -314.4721374511719, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.660916805267334, "rewards/margins": 14.14590072631836, "rewards/rejected": -14.806818008422852, "step": 5614 }, { "epoch": 1.92, "learning_rate": 9.154403421193224e-09, "logits/chosen": 0.09713809937238693, "logits/rejected": 0.11012612283229828, "logps/chosen": -264.33355712890625, "logps/rejected": -444.4055480957031, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.6273218393325806, "rewards/margins": 19.326862335205078, "rewards/rejected": -19.95418357849121, "step": 5615 }, { "epoch": 1.92, "learning_rate": 9.079939173100237e-09, "logits/chosen": 0.042960599064826965, "logits/rejected": 0.07554442435503006, "logps/chosen": -228.38072204589844, "logps/rejected": -433.21356201171875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.783319890499115, "rewards/margins": 18.88434600830078, "rewards/rejected": -19.667665481567383, "step": 5616 }, { "epoch": 1.92, "learning_rate": 9.005777637410838e-09, "logits/chosen": 0.1141854077577591, "logits/rejected": 0.1622074842453003, "logps/chosen": -189.57301330566406, "logps/rejected": -285.8372802734375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.4598369002342224, "rewards/margins": 14.16102409362793, "rewards/rejected": -14.62086009979248, "step": 5617 }, { "epoch": 1.92, "learning_rate": 8.93191883678035e-09, "logits/chosen": -0.038813140243291855, "logits/rejected": 0.011626893654465675, "logps/chosen": -179.2550048828125, "logps/rejected": -356.4372253417969, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.28576844930648804, "rewards/margins": 20.406694412231445, "rewards/rejected": -20.120925903320312, "step": 5618 }, { "epoch": 1.92, "learning_rate": 8.858362793771501e-09, "logits/chosen": 0.12530194222927094, "logits/rejected": 0.1567019671201706, "logps/chosen": -134.55715942382812, "logps/rejected": -290.9811096191406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.34027624130249023, "rewards/margins": 16.386127471923828, "rewards/rejected": -16.726402282714844, "step": 5619 }, { "epoch": 1.92, "learning_rate": 8.785109530854873e-09, "logits/chosen": 0.10826142877340317, "logits/rejected": 0.1734597533941269, "logps/chosen": -221.1148681640625, "logps/rejected": -282.09307861328125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0706288814544678, "rewards/margins": 15.181583404541016, "rewards/rejected": -16.252212524414062, "step": 5620 }, { "epoch": 1.92, "learning_rate": 8.71215907040812e-09, "logits/chosen": 0.14951634407043457, "logits/rejected": 0.15134815871715546, "logps/chosen": -177.4592742919922, "logps/rejected": -378.1666564941406, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.8219153881072998, "rewards/margins": 17.40730857849121, "rewards/rejected": -19.229225158691406, "step": 5621 }, { "epoch": 1.92, "learning_rate": 8.639511434716863e-09, "logits/chosen": 0.0667492225766182, "logits/rejected": 0.0940713956952095, "logps/chosen": -229.8253631591797, "logps/rejected": -414.7611389160156, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.6430950164794922, "rewards/margins": 18.037233352661133, "rewards/rejected": -19.680328369140625, "step": 5622 }, { "epoch": 1.92, "learning_rate": 8.567166645973678e-09, "logits/chosen": 0.09769493341445923, "logits/rejected": 0.12301532179117203, "logps/chosen": -206.17929077148438, "logps/rejected": -377.6883850097656, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -0.9264777898788452, "rewards/margins": 15.98169994354248, "rewards/rejected": -16.90817642211914, "step": 5623 }, { "epoch": 1.92, "learning_rate": 8.495124726279001e-09, "logits/chosen": 0.017374569550156593, "logits/rejected": 0.05454045906662941, "logps/chosen": -258.3189697265625, "logps/rejected": -509.8059997558594, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.154253363609314, "rewards/margins": 24.8199405670166, "rewards/rejected": -25.974193572998047, "step": 5624 }, { "epoch": 1.92, "learning_rate": 8.423385697640784e-09, "logits/chosen": 0.12197829782962799, "logits/rejected": 0.16476361453533173, "logps/chosen": -180.8690948486328, "logps/rejected": -353.4076232910156, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -2.3006539344787598, "rewards/margins": 17.483081817626953, "rewards/rejected": -19.783735275268555, "step": 5625 }, { "epoch": 1.92, "learning_rate": 8.351949581973938e-09, "logits/chosen": -0.034844473004341125, "logits/rejected": -0.014306902885437012, "logps/chosen": -240.22352600097656, "logps/rejected": -354.71307373046875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.0097975730895996, "rewards/margins": 13.483464241027832, "rewards/rejected": -15.493260383605957, "step": 5626 }, { "epoch": 1.92, "learning_rate": 8.280816401101676e-09, "logits/chosen": 0.07766983658075333, "logits/rejected": 0.11079222708940506, "logps/chosen": -199.24288940429688, "logps/rejected": -368.2162170410156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9234950542449951, "rewards/margins": 16.294050216674805, "rewards/rejected": -18.217546463012695, "step": 5627 }, { "epoch": 1.92, "learning_rate": 8.209986176753948e-09, "logits/chosen": -0.00401999382302165, "logits/rejected": -0.003978651016950607, "logps/chosen": -171.5595245361328, "logps/rejected": -317.8143310546875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9541325569152832, "rewards/margins": 12.525712966918945, "rewards/rejected": -13.47984504699707, "step": 5628 }, { "epoch": 1.92, "learning_rate": 8.139458930568332e-09, "logits/chosen": 0.01031972374767065, "logits/rejected": 0.05757557973265648, "logps/chosen": -271.30908203125, "logps/rejected": -335.7745361328125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.1557424068450928, "rewards/margins": 12.94714069366455, "rewards/rejected": -15.102883338928223, "step": 5629 }, { "epoch": 1.92, "learning_rate": 8.069234684090153e-09, "logits/chosen": -0.04930777847766876, "logits/rejected": -0.03595039248466492, "logps/chosen": -277.6411437988281, "logps/rejected": -466.0555114746094, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.591042160987854, "rewards/margins": 17.97176742553711, "rewards/rejected": -19.56281089782715, "step": 5630 }, { "epoch": 1.92, "learning_rate": 7.999313458771695e-09, "logits/chosen": -0.041127294301986694, "logits/rejected": -0.0012144246138632298, "logps/chosen": -241.25665283203125, "logps/rejected": -371.54132080078125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.8811221122741699, "rewards/margins": 17.0221004486084, "rewards/rejected": -17.903223037719727, "step": 5631 }, { "epoch": 1.92, "learning_rate": 7.929695275973203e-09, "logits/chosen": 0.1366858333349228, "logits/rejected": 0.20764853060245514, "logps/chosen": -223.86361694335938, "logps/rejected": -397.7121276855469, "loss": 0.0186, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5668609142303467, "rewards/margins": 17.44546127319336, "rewards/rejected": -20.0123233795166, "step": 5632 }, { "epoch": 1.92, "learning_rate": 7.860380156961888e-09, "logits/chosen": 0.06236664950847626, "logits/rejected": 0.0723971575498581, "logps/chosen": -213.5951385498047, "logps/rejected": -335.7076721191406, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.911041498184204, "rewards/margins": 11.813472747802734, "rewards/rejected": -13.724515914916992, "step": 5633 }, { "epoch": 1.92, "learning_rate": 7.791368122912589e-09, "logits/chosen": -0.14202111959457397, "logits/rejected": -0.0998508408665657, "logps/chosen": -231.9381103515625, "logps/rejected": -355.7624816894531, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.1161789894104004, "rewards/margins": 15.130926132202148, "rewards/rejected": -17.24710464477539, "step": 5634 }, { "epoch": 1.92, "learning_rate": 7.722659194907555e-09, "logits/chosen": 0.022752024233341217, "logits/rejected": 0.05158774554729462, "logps/chosen": -219.44146728515625, "logps/rejected": -378.98944091796875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.9479213953018188, "rewards/margins": 16.49932861328125, "rewards/rejected": -17.447248458862305, "step": 5635 }, { "epoch": 1.92, "learning_rate": 7.654253393936439e-09, "logits/chosen": 0.16960953176021576, "logits/rejected": 0.1973731815814972, "logps/chosen": -158.7115478515625, "logps/rejected": -310.6712951660156, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.9286261796951294, "rewards/margins": 17.47653579711914, "rewards/rejected": -19.405162811279297, "step": 5636 }, { "epoch": 1.92, "learning_rate": 7.586150740896302e-09, "logits/chosen": 0.07108200341463089, "logits/rejected": 0.08774477243423462, "logps/chosen": -163.704345703125, "logps/rejected": -348.22625732421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.4110209941864014, "rewards/margins": 14.970041275024414, "rewards/rejected": -17.38106346130371, "step": 5637 }, { "epoch": 1.92, "learning_rate": 7.518351256591394e-09, "logits/chosen": 0.0439818799495697, "logits/rejected": 0.10050461441278458, "logps/chosen": -249.0146026611328, "logps/rejected": -446.33087158203125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.8161399364471436, "rewards/margins": 20.85472869873047, "rewards/rejected": -23.670867919921875, "step": 5638 }, { "epoch": 1.92, "learning_rate": 7.450854961733588e-09, "logits/chosen": 0.10230538994073868, "logits/rejected": 0.13165713846683502, "logps/chosen": -152.5471649169922, "logps/rejected": -273.89605712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9977400302886963, "rewards/margins": 12.947526931762695, "rewards/rejected": -14.945266723632812, "step": 5639 }, { "epoch": 1.92, "learning_rate": 7.3836618769422824e-09, "logits/chosen": -0.034753069281578064, "logits/rejected": 1.7821814253693447e-05, "logps/chosen": -235.5726776123047, "logps/rejected": -383.242431640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.8679111003875732, "rewards/margins": 14.495552062988281, "rewards/rejected": -16.363462448120117, "step": 5640 }, { "epoch": 1.93, "learning_rate": 7.316772022743612e-09, "logits/chosen": 0.10088776797056198, "logits/rejected": 0.12248043715953827, "logps/chosen": -217.44866943359375, "logps/rejected": -375.4865417480469, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4694359302520752, "rewards/margins": 16.43802261352539, "rewards/rejected": -17.90745735168457, "step": 5641 }, { "epoch": 1.93, "learning_rate": 7.250185419572008e-09, "logits/chosen": 0.15085740387439728, "logits/rejected": 0.18095526099205017, "logps/chosen": -118.0843734741211, "logps/rejected": -281.5302429199219, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.7932772636413574, "rewards/margins": 17.157325744628906, "rewards/rejected": -18.95060157775879, "step": 5642 }, { "epoch": 1.93, "learning_rate": 7.1839020877684234e-09, "logits/chosen": 0.07548970729112625, "logits/rejected": 0.08731767535209656, "logps/chosen": -209.87136840820312, "logps/rejected": -365.90771484375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.18375007808208466, "rewards/margins": 15.65388298034668, "rewards/rejected": -15.470132827758789, "step": 5643 }, { "epoch": 1.93, "learning_rate": 7.1179220475815485e-09, "logits/chosen": 0.012452028691768646, "logits/rejected": 0.03266283869743347, "logps/chosen": -182.917236328125, "logps/rejected": -342.0550842285156, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.6585818529129028, "rewards/margins": 15.833450317382812, "rewards/rejected": -17.49203109741211, "step": 5644 }, { "epoch": 1.93, "learning_rate": 7.0522453191674826e-09, "logits/chosen": 0.11773856729269028, "logits/rejected": 0.14272278547286987, "logps/chosen": -191.62774658203125, "logps/rejected": -361.2026062011719, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4479811191558838, "rewards/margins": 19.21582794189453, "rewards/rejected": -20.663808822631836, "step": 5645 }, { "epoch": 1.93, "learning_rate": 6.98687192258951e-09, "logits/chosen": -0.02107454091310501, "logits/rejected": -0.007757882587611675, "logps/chosen": -214.8295440673828, "logps/rejected": -403.6802673339844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.0080924034118652, "rewards/margins": 17.274255752563477, "rewards/rejected": -19.282346725463867, "step": 5646 }, { "epoch": 1.93, "learning_rate": 6.921801877818212e-09, "logits/chosen": -0.03902255371212959, "logits/rejected": 0.0014939021784812212, "logps/chosen": -204.20074462890625, "logps/rejected": -316.2472839355469, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.1380242556333542, "rewards/margins": 16.290611267089844, "rewards/rejected": -16.42863655090332, "step": 5647 }, { "epoch": 1.93, "learning_rate": 6.857035204731687e-09, "logits/chosen": 0.03894801437854767, "logits/rejected": 0.04831559211015701, "logps/chosen": -212.30178833007812, "logps/rejected": -367.5349426269531, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -2.357847213745117, "rewards/margins": 16.050718307495117, "rewards/rejected": -18.408565521240234, "step": 5648 }, { "epoch": 1.93, "learning_rate": 6.79257192311522e-09, "logits/chosen": 0.22966799139976501, "logits/rejected": 0.27861762046813965, "logps/chosen": -175.48681640625, "logps/rejected": -228.1369171142578, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.94249027967453, "rewards/margins": 11.289945602416992, "rewards/rejected": -12.23243522644043, "step": 5649 }, { "epoch": 1.93, "learning_rate": 6.728412052661503e-09, "logits/chosen": 0.10154816508293152, "logits/rejected": 0.1088503748178482, "logps/chosen": -162.4638214111328, "logps/rejected": -343.3865661621094, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.380204677581787, "rewards/margins": 14.983928680419922, "rewards/rejected": -16.364131927490234, "step": 5650 }, { "epoch": 1.93, "learning_rate": 6.664555612970524e-09, "logits/chosen": 0.03165619447827339, "logits/rejected": 0.07718457281589508, "logps/chosen": -225.067138671875, "logps/rejected": -343.4463806152344, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.8529019355773926, "rewards/margins": 14.515953063964844, "rewards/rejected": -16.36885643005371, "step": 5651 }, { "epoch": 1.93, "learning_rate": 6.6010026235493455e-09, "logits/chosen": 0.10276441276073456, "logits/rejected": 0.130305677652359, "logps/chosen": -244.99012756347656, "logps/rejected": -383.1788024902344, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.4486033916473389, "rewards/margins": 15.695242881774902, "rewards/rejected": -17.143844604492188, "step": 5652 }, { "epoch": 1.93, "learning_rate": 6.53775310381266e-09, "logits/chosen": 0.12638315558433533, "logits/rejected": 0.1761602759361267, "logps/chosen": -216.3753662109375, "logps/rejected": -315.3881530761719, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.196358561515808, "rewards/margins": 15.346089363098145, "rewards/rejected": -16.54244613647461, "step": 5653 }, { "epoch": 1.93, "learning_rate": 6.474807073082345e-09, "logits/chosen": 0.027467146515846252, "logits/rejected": 0.04394080862402916, "logps/chosen": -197.0547637939453, "logps/rejected": -376.22027587890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5851726531982422, "rewards/margins": 18.49505615234375, "rewards/rejected": -20.080228805541992, "step": 5654 }, { "epoch": 1.93, "learning_rate": 6.412164550587573e-09, "logits/chosen": 0.11509665101766586, "logits/rejected": 0.14045770466327667, "logps/chosen": -223.6917266845703, "logps/rejected": -361.2901306152344, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -2.653043508529663, "rewards/margins": 15.518579483032227, "rewards/rejected": -18.17162322998047, "step": 5655 }, { "epoch": 1.93, "learning_rate": 6.3498255554647054e-09, "logits/chosen": 0.03889438509941101, "logits/rejected": 0.07690847665071487, "logps/chosen": -251.81094360351562, "logps/rejected": -405.4430847167969, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.9444607496261597, "rewards/margins": 16.376760482788086, "rewards/rejected": -18.32122039794922, "step": 5656 }, { "epoch": 1.93, "learning_rate": 6.2877901067573955e-09, "logits/chosen": 0.038411837071180344, "logits/rejected": 0.05715736746788025, "logps/chosen": -236.6034698486328, "logps/rejected": -425.66461181640625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.3414297103881836, "rewards/margins": 17.934667587280273, "rewards/rejected": -19.27609634399414, "step": 5657 }, { "epoch": 1.93, "learning_rate": 6.226058223416708e-09, "logits/chosen": -0.016787858679890633, "logits/rejected": 0.003988976124674082, "logps/chosen": -187.31048583984375, "logps/rejected": -356.7852783203125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1655539274215698, "rewards/margins": 17.564687728881836, "rewards/rejected": -18.730239868164062, "step": 5658 }, { "epoch": 1.93, "learning_rate": 6.16462992430089e-09, "logits/chosen": 0.13745103776454926, "logits/rejected": 0.17799973487854004, "logps/chosen": -176.845947265625, "logps/rejected": -311.28253173828125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.9632530212402344, "rewards/margins": 16.716537475585938, "rewards/rejected": -18.679790496826172, "step": 5659 }, { "epoch": 1.93, "learning_rate": 6.103505228175376e-09, "logits/chosen": 0.05614983290433884, "logits/rejected": 0.09301489591598511, "logps/chosen": -187.49789428710938, "logps/rejected": -313.147705078125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.679518222808838, "rewards/margins": 14.378716468811035, "rewards/rejected": -17.0582332611084, "step": 5660 }, { "epoch": 1.93, "learning_rate": 6.042684153713007e-09, "logits/chosen": 0.007917137816548347, "logits/rejected": 0.02882060781121254, "logps/chosen": -187.51895141601562, "logps/rejected": -292.316162109375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.3438422679901123, "rewards/margins": 11.128829956054688, "rewards/rejected": -13.472671508789062, "step": 5661 }, { "epoch": 1.93, "learning_rate": 5.982166719493587e-09, "logits/chosen": 0.059268295764923096, "logits/rejected": 0.0558742918074131, "logps/chosen": -183.63449096679688, "logps/rejected": -375.455322265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.293700695037842, "rewards/margins": 16.508010864257812, "rewards/rejected": -20.801712036132812, "step": 5662 }, { "epoch": 1.93, "learning_rate": 5.92195294400466e-09, "logits/chosen": 0.025743432343006134, "logits/rejected": 0.05519842728972435, "logps/chosen": -250.26585388183594, "logps/rejected": -422.9781799316406, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.591444730758667, "rewards/margins": 19.034440994262695, "rewards/rejected": -21.625885009765625, "step": 5663 }, { "epoch": 1.93, "learning_rate": 5.862042845640403e-09, "logits/chosen": 0.07419147342443466, "logits/rejected": 0.07201416045427322, "logps/chosen": -153.92710876464844, "logps/rejected": -304.7781982421875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.4177156686782837, "rewards/margins": 12.528183937072754, "rewards/rejected": -13.945899963378906, "step": 5664 }, { "epoch": 1.93, "learning_rate": 5.80243644270273e-09, "logits/chosen": 0.05976365879178047, "logits/rejected": 0.07628362625837326, "logps/chosen": -200.2917938232422, "logps/rejected": -342.30084228515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.4821982383728027, "rewards/margins": 15.189577102661133, "rewards/rejected": -16.671775817871094, "step": 5665 }, { "epoch": 1.93, "learning_rate": 5.743133753400409e-09, "logits/chosen": 0.033900186419487, "logits/rejected": 0.06994087994098663, "logps/chosen": -234.83216857910156, "logps/rejected": -405.4918212890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461501568555832, "rewards/margins": 19.613000869750977, "rewards/rejected": -19.759153366088867, "step": 5666 }, { "epoch": 1.93, "learning_rate": 5.684134795849727e-09, "logits/chosen": 0.11166010051965714, "logits/rejected": 0.15243487060070038, "logps/chosen": -270.22930908203125, "logps/rejected": -427.03857421875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.5335111618041992, "rewards/margins": 17.800128936767578, "rewards/rejected": -18.333641052246094, "step": 5667 }, { "epoch": 1.93, "learning_rate": 5.6254395880740435e-09, "logits/chosen": 0.07726088166236877, "logits/rejected": 0.09644681960344315, "logps/chosen": -232.44281005859375, "logps/rejected": -371.45526123046875, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.3233962059020996, "rewards/margins": 16.740219116210938, "rewards/rejected": -17.063615798950195, "step": 5668 }, { "epoch": 1.93, "learning_rate": 5.567048148003795e-09, "logits/chosen": 0.03697101026773453, "logits/rejected": 0.06974731385707855, "logps/chosen": -229.48294067382812, "logps/rejected": -393.166259765625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.16659733653068542, "rewards/margins": 19.601755142211914, "rewards/rejected": -19.435155868530273, "step": 5669 }, { "epoch": 1.94, "learning_rate": 5.508960493476933e-09, "logits/chosen": 0.0584818534553051, "logits/rejected": 0.06452374160289764, "logps/chosen": -152.91273498535156, "logps/rejected": -340.944091796875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.6271870732307434, "rewards/margins": 16.172555923461914, "rewards/rejected": -16.79974365234375, "step": 5670 }, { "epoch": 1.94, "learning_rate": 5.451176642238375e-09, "logits/chosen": 0.07488106191158295, "logits/rejected": 0.09666585922241211, "logps/chosen": -224.97634887695312, "logps/rejected": -389.80853271484375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.4682986736297607, "rewards/margins": 18.469860076904297, "rewards/rejected": -19.938159942626953, "step": 5671 }, { "epoch": 1.94, "learning_rate": 5.393696611940224e-09, "logits/chosen": -0.0002542775182519108, "logits/rejected": 0.03459375724196434, "logps/chosen": -219.81529235839844, "logps/rejected": -390.168212890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.6035442352294922, "rewards/margins": 17.305784225463867, "rewards/rejected": -17.90932846069336, "step": 5672 }, { "epoch": 1.94, "learning_rate": 5.336520420141988e-09, "logits/chosen": 0.0038849194534122944, "logits/rejected": 0.03592681139707565, "logps/chosen": -225.91525268554688, "logps/rejected": -397.12884521484375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.47735753655433655, "rewards/margins": 19.699134826660156, "rewards/rejected": -20.176490783691406, "step": 5673 }, { "epoch": 1.94, "learning_rate": 5.2796480843100285e-09, "logits/chosen": 0.13561898469924927, "logits/rejected": 0.1409212350845337, "logps/chosen": -235.05076599121094, "logps/rejected": -400.6573791503906, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.722599983215332, "rewards/margins": 17.039243698120117, "rewards/rejected": -17.761844635009766, "step": 5674 }, { "epoch": 1.94, "learning_rate": 5.223079621818116e-09, "logits/chosen": 0.05763490870594978, "logits/rejected": 0.09038780629634857, "logps/chosen": -220.90155029296875, "logps/rejected": -394.76300048828125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.507143020629883, "rewards/margins": 17.126300811767578, "rewards/rejected": -19.63344383239746, "step": 5675 }, { "epoch": 1.94, "learning_rate": 5.166815049947204e-09, "logits/chosen": 0.1023809015750885, "logits/rejected": 0.10507077723741531, "logps/chosen": -174.68887329101562, "logps/rejected": -311.8010559082031, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.0770277976989746, "rewards/margins": 12.588135719299316, "rewards/rejected": -13.66516399383545, "step": 5676 }, { "epoch": 1.94, "learning_rate": 5.110854385885322e-09, "logits/chosen": -0.014877507463097572, "logits/rejected": 1.3643704733112827e-05, "logps/chosen": -247.5346221923828, "logps/rejected": -477.74200439453125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -3.948655128479004, "rewards/margins": 20.76690101623535, "rewards/rejected": -24.71555519104004, "step": 5677 }, { "epoch": 1.94, "learning_rate": 5.0551976467275715e-09, "logits/chosen": 0.017432864755392075, "logits/rejected": 0.06591658294200897, "logps/chosen": -232.56736755371094, "logps/rejected": -411.12652587890625, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.4585118293762207, "rewards/margins": 17.647172927856445, "rewards/rejected": -18.10568618774414, "step": 5678 }, { "epoch": 1.94, "learning_rate": 4.9998448494764644e-09, "logits/chosen": 0.20772074162960052, "logits/rejected": 0.22276915609836578, "logps/chosen": -192.07958984375, "logps/rejected": -413.7492980957031, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1410276889801025, "rewards/margins": 19.731399536132812, "rewards/rejected": -20.872425079345703, "step": 5679 }, { "epoch": 1.94, "learning_rate": 4.944796011041474e-09, "logits/chosen": -0.0672796368598938, "logits/rejected": -0.040031228214502335, "logps/chosen": -268.61737060546875, "logps/rejected": -444.80316162109375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.2259100675582886, "rewards/margins": 17.46574592590332, "rewards/rejected": -18.691654205322266, "step": 5680 }, { "epoch": 1.94, "learning_rate": 4.89005114823926e-09, "logits/chosen": 0.03747731074690819, "logits/rejected": 0.10079661011695862, "logps/chosen": -212.7790069580078, "logps/rejected": -329.4067687988281, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.9588397741317749, "rewards/margins": 16.850627899169922, "rewards/rejected": -17.80946922302246, "step": 5681 }, { "epoch": 1.94, "learning_rate": 4.835610277793445e-09, "logits/chosen": 0.10363668203353882, "logits/rejected": 0.12273101508617401, "logps/chosen": -190.95883178710938, "logps/rejected": -329.6629943847656, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.0047268867492676, "rewards/margins": 15.32940673828125, "rewards/rejected": -16.33413314819336, "step": 5682 }, { "epoch": 1.94, "learning_rate": 4.78147341633528e-09, "logits/chosen": 0.03203326836228371, "logits/rejected": 0.07882364094257355, "logps/chosen": -232.12313842773438, "logps/rejected": -397.868408203125, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.732179045677185, "rewards/margins": 19.578920364379883, "rewards/rejected": -21.311098098754883, "step": 5683 }, { "epoch": 1.94, "learning_rate": 4.727640580402537e-09, "logits/chosen": 0.07037757337093353, "logits/rejected": 0.1201278492808342, "logps/chosen": -264.6301574707031, "logps/rejected": -466.5718994140625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.8120150566101074, "rewards/margins": 22.51472282409668, "rewards/rejected": -23.326740264892578, "step": 5684 }, { "epoch": 1.94, "learning_rate": 4.674111786440727e-09, "logits/chosen": 0.06572601944208145, "logits/rejected": 0.0873127281665802, "logps/chosen": -189.232421875, "logps/rejected": -367.3251647949219, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.3793060779571533, "rewards/margins": 15.567544937133789, "rewards/rejected": -16.946849822998047, "step": 5685 }, { "epoch": 1.94, "learning_rate": 4.62088705080177e-09, "logits/chosen": 0.09826024621725082, "logits/rejected": 0.11338471621274948, "logps/chosen": -183.94468688964844, "logps/rejected": -324.69781494140625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.2880256175994873, "rewards/margins": 14.007566452026367, "rewards/rejected": -16.295589447021484, "step": 5686 }, { "epoch": 1.94, "learning_rate": 4.567966389745437e-09, "logits/chosen": 0.12116816639900208, "logits/rejected": 0.13566027581691742, "logps/chosen": -228.48443603515625, "logps/rejected": -484.5063781738281, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.267268657684326, "rewards/margins": 20.80677032470703, "rewards/rejected": -23.074039459228516, "step": 5687 }, { "epoch": 1.94, "learning_rate": 4.515349819438019e-09, "logits/chosen": 0.12741771340370178, "logits/rejected": 0.1565469205379486, "logps/chosen": -183.7506866455078, "logps/rejected": -296.0252685546875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.5300272703170776, "rewards/margins": 14.767470359802246, "rewards/rejected": -16.297496795654297, "step": 5688 }, { "epoch": 1.94, "learning_rate": 4.463037355953325e-09, "logits/chosen": -0.0008687556255608797, "logits/rejected": 0.056220971047878265, "logps/chosen": -244.182861328125, "logps/rejected": -394.69219970703125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.7821518182754517, "rewards/margins": 20.85626220703125, "rewards/rejected": -22.638412475585938, "step": 5689 }, { "epoch": 1.94, "learning_rate": 4.4110290152719055e-09, "logits/chosen": 0.1867694854736328, "logits/rejected": 0.22005575895309448, "logps/chosen": -167.44093322753906, "logps/rejected": -291.3630676269531, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.4831713438034058, "rewards/margins": 12.941051483154297, "rewards/rejected": -14.424222946166992, "step": 5690 }, { "epoch": 1.94, "learning_rate": 4.359324813281717e-09, "logits/chosen": 0.07939304411411285, "logits/rejected": 0.12846846878528595, "logps/chosen": -196.18650817871094, "logps/rejected": -347.8204040527344, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -0.7321776747703552, "rewards/margins": 17.513782501220703, "rewards/rejected": -18.245960235595703, "step": 5691 }, { "epoch": 1.94, "learning_rate": 4.3079247657776815e-09, "logits/chosen": -0.06329259276390076, "logits/rejected": -0.009328171610832214, "logps/chosen": -257.1244201660156, "logps/rejected": -344.7905578613281, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.32869377732276917, "rewards/margins": 15.067327499389648, "rewards/rejected": -15.39602279663086, "step": 5692 }, { "epoch": 1.94, "learning_rate": 4.256828888461794e-09, "logits/chosen": 0.08237593621015549, "logits/rejected": 0.1382906287908554, "logps/chosen": -185.9806365966797, "logps/rejected": -369.79595947265625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.2706716060638428, "rewards/margins": 17.092506408691406, "rewards/rejected": -19.363176345825195, "step": 5693 }, { "epoch": 1.94, "learning_rate": 4.206037196943124e-09, "logits/chosen": 0.050935450941324234, "logits/rejected": 0.06497931480407715, "logps/chosen": -248.41171264648438, "logps/rejected": -422.4398498535156, "loss": 0.0188, "rewards/accuracies": 0.9375, "rewards/chosen": -1.22538423538208, "rewards/margins": 15.482837677001953, "rewards/rejected": -16.708219528198242, "step": 5694 }, { "epoch": 1.94, "learning_rate": 4.155549706737815e-09, "logits/chosen": 0.14605554938316345, "logits/rejected": 0.13426938652992249, "logps/chosen": -186.95852661132812, "logps/rejected": -349.1944274902344, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -1.4684165716171265, "rewards/margins": 13.352787017822266, "rewards/rejected": -14.821203231811523, "step": 5695 }, { "epoch": 1.94, "learning_rate": 4.105366433269086e-09, "logits/chosen": 0.1243281364440918, "logits/rejected": 0.15424057841300964, "logps/chosen": -245.43499755859375, "logps/rejected": -399.56939697265625, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.747840404510498, "rewards/margins": 16.559350967407227, "rewards/rejected": -18.307191848754883, "step": 5696 }, { "epoch": 1.94, "learning_rate": 4.05548739186734e-09, "logits/chosen": 0.07922506332397461, "logits/rejected": 0.11253030598163605, "logps/chosen": -193.49913024902344, "logps/rejected": -387.8526611328125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.626594305038452, "rewards/margins": 18.718172073364258, "rewards/rejected": -21.34476661682129, "step": 5697 }, { "epoch": 1.94, "learning_rate": 4.005912597769834e-09, "logits/chosen": 0.08032500743865967, "logits/rejected": 0.12690338492393494, "logps/chosen": -207.32192993164062, "logps/rejected": -299.47161865234375, "loss": 0.0254, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5504710674285889, "rewards/margins": 12.910161972045898, "rewards/rejected": -14.46063232421875, "step": 5698 }, { "epoch": 1.95, "learning_rate": 3.956642066121008e-09, "logits/chosen": 0.07480398565530777, "logits/rejected": 0.13026976585388184, "logps/chosen": -190.92007446289062, "logps/rejected": -297.1832580566406, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -2.2365739345550537, "rewards/margins": 16.82230567932129, "rewards/rejected": -19.05887794494629, "step": 5699 }, { "epoch": 1.95, "learning_rate": 3.907675811972267e-09, "logits/chosen": 0.10813438892364502, "logits/rejected": 0.10952573269605637, "logps/chosen": -227.2371826171875, "logps/rejected": -470.1002197265625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.8749070167541504, "rewards/margins": 21.913467407226562, "rewards/rejected": -24.788375854492188, "step": 5700 }, { "epoch": 1.95, "learning_rate": 3.859013850282311e-09, "logits/chosen": 0.02039151079952717, "logits/rejected": 0.0625113919377327, "logps/chosen": -186.72203063964844, "logps/rejected": -328.8477783203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.7391104698181152, "rewards/margins": 15.512580871582031, "rewards/rejected": -19.251691818237305, "step": 5701 }, { "epoch": 1.95, "learning_rate": 3.810656195916473e-09, "logits/chosen": 0.0842248797416687, "logits/rejected": 0.10162080824375153, "logps/chosen": -249.29795837402344, "logps/rejected": -447.7738037109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.19944173097610474, "rewards/margins": 22.035755157470703, "rewards/rejected": -21.836313247680664, "step": 5702 }, { "epoch": 1.95, "learning_rate": 3.76260286364749e-09, "logits/chosen": 0.0685553178191185, "logits/rejected": 0.10291524231433868, "logps/chosen": -162.99673461914062, "logps/rejected": -273.0704345703125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.1912083625793457, "rewards/margins": 12.30543041229248, "rewards/rejected": -14.496638298034668, "step": 5703 }, { "epoch": 1.95, "learning_rate": 3.7148538681549545e-09, "logits/chosen": 0.08973605185747147, "logits/rejected": 0.12969735264778137, "logps/chosen": -265.5903015136719, "logps/rejected": -330.0547790527344, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/chosen": -1.6419240236282349, "rewards/margins": 14.570518493652344, "rewards/rejected": -16.21244239807129, "step": 5704 }, { "epoch": 1.95, "learning_rate": 3.6674092240255304e-09, "logits/chosen": 0.0493229404091835, "logits/rejected": 0.06805916875600815, "logps/chosen": -178.23568725585938, "logps/rejected": -306.78607177734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8040518164634705, "rewards/margins": 14.66627025604248, "rewards/rejected": -15.470321655273438, "step": 5705 }, { "epoch": 1.95, "learning_rate": 3.620268945752847e-09, "logits/chosen": 0.05245829001069069, "logits/rejected": 0.07668482512235641, "logps/chosen": -213.60983276367188, "logps/rejected": -343.257080078125, "loss": 0.0834, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6779282093048096, "rewards/margins": 12.242884635925293, "rewards/rejected": -13.920814514160156, "step": 5706 }, { "epoch": 1.95, "learning_rate": 3.573433047737717e-09, "logits/chosen": -0.031614355742931366, "logits/rejected": 0.011746615171432495, "logps/chosen": -245.3734130859375, "logps/rejected": -457.9759521484375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.1817986965179443, "rewards/margins": 18.88515281677246, "rewards/rejected": -20.066951751708984, "step": 5707 }, { "epoch": 1.95, "learning_rate": 3.526901544287808e-09, "logits/chosen": 0.08093749731779099, "logits/rejected": 0.11224281787872314, "logps/chosen": -202.72482299804688, "logps/rejected": -412.2832336425781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.5063538551330566, "rewards/margins": 19.696765899658203, "rewards/rejected": -22.2031192779541, "step": 5708 }, { "epoch": 1.95, "learning_rate": 3.48067444961786e-09, "logits/chosen": 0.06377621740102768, "logits/rejected": 0.09841030091047287, "logps/chosen": -237.0762481689453, "logps/rejected": -354.48492431640625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6027473211288452, "rewards/margins": 18.20676040649414, "rewards/rejected": -18.809507369995117, "step": 5709 }, { "epoch": 1.95, "learning_rate": 3.434751777849576e-09, "logits/chosen": -0.08961884677410126, "logits/rejected": -0.04876592382788658, "logps/chosen": -263.81427001953125, "logps/rejected": -391.7444152832031, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.06414882093667984, "rewards/margins": 17.94561195373535, "rewards/rejected": -17.88146209716797, "step": 5710 }, { "epoch": 1.95, "learning_rate": 3.389133543011735e-09, "logits/chosen": 0.08376327157020569, "logits/rejected": 0.10797670483589172, "logps/chosen": -150.4098663330078, "logps/rejected": -297.8883361816406, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.7988851070404053, "rewards/margins": 13.933160781860352, "rewards/rejected": -16.732044219970703, "step": 5711 }, { "epoch": 1.95, "learning_rate": 3.3438197590400787e-09, "logits/chosen": 0.10327115654945374, "logits/rejected": 0.1146334707736969, "logps/chosen": -177.03370666503906, "logps/rejected": -360.3916015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4721823930740356, "rewards/margins": 16.502731323242188, "rewards/rejected": -17.97491455078125, "step": 5712 }, { "epoch": 1.95, "learning_rate": 3.298810439777311e-09, "logits/chosen": -0.004948806017637253, "logits/rejected": 0.018215354532003403, "logps/chosen": -239.1412811279297, "logps/rejected": -414.5157470703125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.465747594833374, "rewards/margins": 18.19580841064453, "rewards/rejected": -19.661556243896484, "step": 5713 }, { "epoch": 1.95, "learning_rate": 3.2541055989731002e-09, "logits/chosen": -0.11303384602069855, "logits/rejected": -0.06776084750890732, "logps/chosen": -219.10043334960938, "logps/rejected": -339.2807312011719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.463590383529663, "rewards/margins": 16.400348663330078, "rewards/rejected": -18.863943099975586, "step": 5714 }, { "epoch": 1.95, "learning_rate": 3.2097052502843e-09, "logits/chosen": -0.0335485003888607, "logits/rejected": -0.00243952963501215, "logps/chosen": -225.32418823242188, "logps/rejected": -393.2782897949219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.084826111793518, "rewards/margins": 18.53461265563965, "rewards/rejected": -19.619441986083984, "step": 5715 }, { "epoch": 1.95, "learning_rate": 3.1656094072746163e-09, "logits/chosen": 0.02018260583281517, "logits/rejected": 0.064321368932724, "logps/chosen": -187.19119262695312, "logps/rejected": -346.3973388671875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -3.463078737258911, "rewards/margins": 15.780471801757812, "rewards/rejected": -19.243549346923828, "step": 5716 }, { "epoch": 1.95, "learning_rate": 3.1218180834144957e-09, "logits/chosen": -0.001544965780340135, "logits/rejected": 0.05246094614267349, "logps/chosen": -280.7773742675781, "logps/rejected": -388.7720947265625, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -1.1474272012710571, "rewards/margins": 20.44232177734375, "rewards/rejected": -21.589750289916992, "step": 5717 }, { "epoch": 1.95, "learning_rate": 3.078331292081793e-09, "logits/chosen": 0.04067390784621239, "logits/rejected": 0.06037954241037369, "logps/chosen": -293.1089782714844, "logps/rejected": -464.7379455566406, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.4887728691101074, "rewards/margins": 18.393417358398438, "rewards/rejected": -21.88218879699707, "step": 5718 }, { "epoch": 1.95, "learning_rate": 3.035149046561103e-09, "logits/chosen": -0.03664817661046982, "logits/rejected": -0.03561408817768097, "logps/chosen": -168.8181915283203, "logps/rejected": -339.6459045410156, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.4923474788665771, "rewards/margins": 14.686155319213867, "rewards/rejected": -16.178503036499023, "step": 5719 }, { "epoch": 1.95, "learning_rate": 2.9922713600439852e-09, "logits/chosen": 0.07384499907493591, "logits/rejected": 0.0893978551030159, "logps/chosen": -215.72132873535156, "logps/rejected": -351.6182556152344, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7391929626464844, "rewards/margins": 15.209745407104492, "rewards/rejected": -16.94894027709961, "step": 5720 }, { "epoch": 1.95, "learning_rate": 2.949698245628851e-09, "logits/chosen": 0.04806828871369362, "logits/rejected": 0.05315346643328667, "logps/chosen": -186.54148864746094, "logps/rejected": -348.7995910644531, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0532491207122803, "rewards/margins": 13.432605743408203, "rewards/rejected": -16.485855102539062, "step": 5721 }, { "epoch": 1.95, "learning_rate": 2.907429716321408e-09, "logits/chosen": 0.07075570523738861, "logits/rejected": 0.06449031084775925, "logps/chosen": -156.8021240234375, "logps/rejected": -325.05718994140625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -2.0929055213928223, "rewards/margins": 14.659526824951172, "rewards/rejected": -16.752431869506836, "step": 5722 }, { "epoch": 1.95, "learning_rate": 2.8654657850339936e-09, "logits/chosen": 0.01978606916964054, "logits/rejected": 0.022798405960202217, "logps/chosen": -225.6674041748047, "logps/rejected": -404.8405456542969, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.5849319696426392, "rewards/margins": 15.765127182006836, "rewards/rejected": -17.350059509277344, "step": 5723 }, { "epoch": 1.95, "learning_rate": 2.8238064645859095e-09, "logits/chosen": 0.0680723637342453, "logits/rejected": 0.07812868058681488, "logps/chosen": -162.28465270996094, "logps/rejected": -296.00732421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.7566945552825928, "rewards/margins": 13.267722129821777, "rewards/rejected": -16.024415969848633, "step": 5724 }, { "epoch": 1.95, "learning_rate": 2.782451767703753e-09, "logits/chosen": -0.03668376803398132, "logits/rejected": 0.009430630132555962, "logps/chosen": -233.29722595214844, "logps/rejected": -261.4169616699219, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3194750547409058, "rewards/margins": 10.476106643676758, "rewards/rejected": -11.795580863952637, "step": 5725 }, { "epoch": 1.95, "learning_rate": 2.7414017070206406e-09, "logits/chosen": 0.09399040788412094, "logits/rejected": 0.09771132469177246, "logps/chosen": -197.31692504882812, "logps/rejected": -369.62115478515625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.4759759902954102, "rewards/margins": 15.66059684753418, "rewards/rejected": -17.136571884155273, "step": 5726 }, { "epoch": 1.95, "learning_rate": 2.7006562950767642e-09, "logits/chosen": 0.059289950877428055, "logits/rejected": 0.0980198010802269, "logps/chosen": -251.8964385986328, "logps/rejected": -355.1877746582031, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -2.0736682415008545, "rewards/margins": 13.50981616973877, "rewards/rejected": -15.583484649658203, "step": 5727 }, { "epoch": 1.95, "learning_rate": 2.6602155443194997e-09, "logits/chosen": 0.06801997125148773, "logits/rejected": 0.13203267753124237, "logps/chosen": -223.94204711914062, "logps/rejected": -314.48358154296875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.8310632705688477, "rewards/margins": 14.522954940795898, "rewards/rejected": -17.354019165039062, "step": 5728 }, { "epoch": 1.96, "learning_rate": 2.6200794671027427e-09, "logits/chosen": 0.1060263067483902, "logits/rejected": 0.1297263354063034, "logps/chosen": -165.88633728027344, "logps/rejected": -328.7735595703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.563359022140503, "rewards/margins": 14.196035385131836, "rewards/rejected": -17.7593936920166, "step": 5729 }, { "epoch": 1.96, "learning_rate": 2.5802480756874635e-09, "logits/chosen": -5.450601383927278e-05, "logits/rejected": 0.031402572989463806, "logps/chosen": -164.105712890625, "logps/rejected": -296.0550842285156, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.16545870900154114, "rewards/margins": 15.163028717041016, "rewards/rejected": -15.328487396240234, "step": 5730 }, { "epoch": 1.96, "learning_rate": 2.5407213822418173e-09, "logits/chosen": 0.030884960666298866, "logits/rejected": 0.0692584365606308, "logps/chosen": -244.7603759765625, "logps/rejected": -391.90618896484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.603473424911499, "rewards/margins": 17.527530670166016, "rewards/rejected": -16.92405891418457, "step": 5731 }, { "epoch": 1.96, "learning_rate": 2.501499398840479e-09, "logits/chosen": 0.07645855844020844, "logits/rejected": 0.09481833130121231, "logps/chosen": -225.5521697998047, "logps/rejected": -363.1095275878906, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -1.6264625787734985, "rewards/margins": 14.369077682495117, "rewards/rejected": -15.995539665222168, "step": 5732 }, { "epoch": 1.96, "learning_rate": 2.4625821374653077e-09, "logits/chosen": -0.05461762100458145, "logits/rejected": -0.03597787022590637, "logps/chosen": -158.3149871826172, "logps/rejected": -355.4045104980469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.408361554145813, "rewards/margins": 16.739852905273438, "rewards/rejected": -18.14821434020996, "step": 5733 }, { "epoch": 1.96, "learning_rate": 2.423969610005017e-09, "logits/chosen": 0.10120625048875809, "logits/rejected": 0.12339700758457184, "logps/chosen": -213.6613311767578, "logps/rejected": -387.4797668457031, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.1304839849472046, "rewards/margins": 18.289011001586914, "rewards/rejected": -19.419496536254883, "step": 5734 }, { "epoch": 1.96, "learning_rate": 2.38566182825517e-09, "logits/chosen": 0.059017159044742584, "logits/rejected": 0.0823030099272728, "logps/chosen": -179.93453979492188, "logps/rejected": -364.1524658203125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.574342966079712, "rewards/margins": 16.5024471282959, "rewards/rejected": -19.07678985595703, "step": 5735 }, { "epoch": 1.96, "learning_rate": 2.3476588039181843e-09, "logits/chosen": 0.08003819733858109, "logits/rejected": 0.10595917701721191, "logps/chosen": -171.33531188964844, "logps/rejected": -353.0979919433594, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.1274304389953613, "rewards/margins": 16.58086585998535, "rewards/rejected": -18.708293914794922, "step": 5736 }, { "epoch": 1.96, "learning_rate": 2.309960548603551e-09, "logits/chosen": 0.04528099671006203, "logits/rejected": 0.03910820186138153, "logps/chosen": -125.11575317382812, "logps/rejected": -333.40631103515625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.5537219047546387, "rewards/margins": 15.813961029052734, "rewards/rejected": -18.367679595947266, "step": 5737 }, { "epoch": 1.96, "learning_rate": 2.272567073827614e-09, "logits/chosen": 0.059744611382484436, "logits/rejected": 0.100760817527771, "logps/chosen": -196.93463134765625, "logps/rejected": -359.93994140625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.085330069065094, "rewards/margins": 19.095706939697266, "rewards/rejected": -19.1810359954834, "step": 5738 }, { "epoch": 1.96, "learning_rate": 2.2354783910134566e-09, "logits/chosen": 0.09354608505964279, "logits/rejected": 0.09880289435386658, "logps/chosen": -202.1763916015625, "logps/rejected": -378.9699401855469, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.063951253890991, "rewards/margins": 16.538673400878906, "rewards/rejected": -18.602622985839844, "step": 5739 }, { "epoch": 1.96, "learning_rate": 2.1986945114911283e-09, "logits/chosen": 0.07520155608654022, "logits/rejected": 0.11804114282131195, "logps/chosen": -257.2941589355469, "logps/rejected": -379.6567687988281, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4461132287979126, "rewards/margins": 16.07635498046875, "rewards/rejected": -17.52246856689453, "step": 5740 }, { "epoch": 1.96, "learning_rate": 2.1622154464977506e-09, "logits/chosen": 0.08464759588241577, "logits/rejected": 0.11111016571521759, "logps/chosen": -168.35552978515625, "logps/rejected": -346.366455078125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.6105341911315918, "rewards/margins": 17.418317794799805, "rewards/rejected": -19.028852462768555, "step": 5741 }, { "epoch": 1.96, "learning_rate": 2.1260412071770762e-09, "logits/chosen": 0.13624508678913116, "logits/rejected": 0.18110895156860352, "logps/chosen": -207.66238403320312, "logps/rejected": -395.77764892578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.0110466480255127, "rewards/margins": 18.306533813476562, "rewards/rejected": -20.31757926940918, "step": 5742 }, { "epoch": 1.96, "learning_rate": 2.0901718045798213e-09, "logits/chosen": 0.11836478114128113, "logits/rejected": 0.12576906383037567, "logps/chosen": -180.484619140625, "logps/rejected": -347.9132995605469, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.2047452926635742, "rewards/margins": 15.629377365112305, "rewards/rejected": -16.834121704101562, "step": 5743 }, { "epoch": 1.96, "learning_rate": 2.054607249663665e-09, "logits/chosen": 0.029255371540784836, "logits/rejected": 0.03808443248271942, "logps/chosen": -225.61471557617188, "logps/rejected": -457.2340393066406, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.486698865890503, "rewards/margins": 20.460039138793945, "rewards/rejected": -21.946739196777344, "step": 5744 }, { "epoch": 1.96, "learning_rate": 2.0193475532930272e-09, "logits/chosen": 0.05384672060608864, "logits/rejected": 0.05271609500050545, "logps/chosen": -208.89410400390625, "logps/rejected": -388.22711181640625, "loss": 0.027, "rewards/accuracies": 0.9375, "rewards/chosen": -2.374959945678711, "rewards/margins": 16.000234603881836, "rewards/rejected": -18.375194549560547, "step": 5745 }, { "epoch": 1.96, "learning_rate": 1.9843927262392924e-09, "logits/chosen": 0.12389394640922546, "logits/rejected": 0.15986022353172302, "logps/chosen": -210.1195526123047, "logps/rejected": -367.8733825683594, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.5605978965759277, "rewards/margins": 15.958328247070312, "rewards/rejected": -18.5189266204834, "step": 5746 }, { "epoch": 1.96, "learning_rate": 1.9497427791806965e-09, "logits/chosen": 0.10620158910751343, "logits/rejected": 0.13238073885440826, "logps/chosen": -207.79742431640625, "logps/rejected": -344.3209228515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3343447744846344, "rewards/margins": 16.257556915283203, "rewards/rejected": -16.591899871826172, "step": 5747 }, { "epoch": 1.96, "learning_rate": 1.9153977227022167e-09, "logits/chosen": 0.07634439319372177, "logits/rejected": 0.11228104680776596, "logps/chosen": -215.06101989746094, "logps/rejected": -438.0091857910156, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.403207540512085, "rewards/margins": 21.67041778564453, "rewards/rejected": -23.073627471923828, "step": 5748 }, { "epoch": 1.96, "learning_rate": 1.8813575672959047e-09, "logits/chosen": 0.11124102771282196, "logits/rejected": 0.13451595604419708, "logps/chosen": -183.75650024414062, "logps/rejected": -323.15283203125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.0735441446304321, "rewards/margins": 14.900917053222656, "rewards/rejected": -15.97446060180664, "step": 5749 }, { "epoch": 1.96, "learning_rate": 1.8476223233604426e-09, "logits/chosen": 0.15044550597667694, "logits/rejected": 0.16186420619487762, "logps/chosen": -206.79632568359375, "logps/rejected": -370.822021484375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.0015993118286133, "rewards/margins": 14.185955047607422, "rewards/rejected": -17.18755340576172, "step": 5750 }, { "epoch": 1.96, "learning_rate": 1.8141920012015865e-09, "logits/chosen": 0.07878164947032928, "logits/rejected": 0.10384983569383621, "logps/chosen": -172.24412536621094, "logps/rejected": -312.85418701171875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4952141046524048, "rewards/margins": 15.851896286010742, "rewards/rejected": -17.347108840942383, "step": 5751 }, { "epoch": 1.96, "learning_rate": 1.781066611031834e-09, "logits/chosen": 0.04752703756093979, "logits/rejected": 0.06073176488280296, "logps/chosen": -233.16722106933594, "logps/rejected": -421.69683837890625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.091843843460083, "rewards/margins": 18.00995635986328, "rewards/rejected": -20.10179901123047, "step": 5752 }, { "epoch": 1.96, "learning_rate": 1.7482461629705347e-09, "logits/chosen": 0.007465985603630543, "logits/rejected": 0.01779763586819172, "logps/chosen": -235.14349365234375, "logps/rejected": -429.6511535644531, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": -2.795175075531006, "rewards/margins": 17.889646530151367, "rewards/rejected": -20.6848201751709, "step": 5753 }, { "epoch": 1.96, "learning_rate": 1.715730667043891e-09, "logits/chosen": 0.13955898582935333, "logits/rejected": 0.15048877894878387, "logps/chosen": -210.1788787841797, "logps/rejected": -353.11712646484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3103749752044678, "rewards/margins": 16.13507080078125, "rewards/rejected": -17.445446014404297, "step": 5754 }, { "epoch": 1.96, "learning_rate": 1.6835201331847349e-09, "logits/chosen": 0.1531255692243576, "logits/rejected": 0.15747004747390747, "logps/chosen": -203.18753051757812, "logps/rejected": -394.2486267089844, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.9765923023223877, "rewards/margins": 18.196325302124023, "rewards/rejected": -21.17291831970215, "step": 5755 }, { "epoch": 1.96, "learning_rate": 1.6516145712333064e-09, "logits/chosen": -0.030346497893333435, "logits/rejected": -0.025257272645831108, "logps/chosen": -232.08984375, "logps/rejected": -432.10418701171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0791943073272705, "rewards/margins": 18.909055709838867, "rewards/rejected": -19.98824691772461, "step": 5756 }, { "epoch": 1.96, "learning_rate": 1.620013990936031e-09, "logits/chosen": 0.06903088092803955, "logits/rejected": 0.10819537937641144, "logps/chosen": -241.4562530517578, "logps/rejected": -340.76519775390625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.1919173002243042, "rewards/margins": 15.261425018310547, "rewards/rejected": -16.45334243774414, "step": 5757 }, { "epoch": 1.97, "learning_rate": 1.5887184019465206e-09, "logits/chosen": -0.04710117727518082, "logits/rejected": -0.02621392346918583, "logps/chosen": -140.6853485107422, "logps/rejected": -228.48074340820312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.8154208660125732, "rewards/margins": 10.663851737976074, "rewards/rejected": -12.47927188873291, "step": 5758 }, { "epoch": 1.97, "learning_rate": 1.5577278138252381e-09, "logits/chosen": 0.10047096759080887, "logits/rejected": 0.1386595517396927, "logps/chosen": -202.4959716796875, "logps/rejected": -373.7525939941406, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.3904483318328857, "rewards/margins": 17.788753509521484, "rewards/rejected": -19.179201126098633, "step": 5759 }, { "epoch": 1.97, "learning_rate": 1.5270422360391666e-09, "logits/chosen": 0.13228097558021545, "logits/rejected": 0.1474638432264328, "logps/chosen": -204.04515075683594, "logps/rejected": -380.17926025390625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -1.99418306350708, "rewards/margins": 17.050228118896484, "rewards/rejected": -19.044410705566406, "step": 5760 }, { "epoch": 1.97, "learning_rate": 1.4966616779625852e-09, "logits/chosen": 0.03714054450392723, "logits/rejected": 0.07183965295553207, "logps/chosen": -198.10704040527344, "logps/rejected": -311.0665588378906, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -2.2081170082092285, "rewards/margins": 14.745243072509766, "rewards/rejected": -16.953359603881836, "step": 5761 }, { "epoch": 1.97, "learning_rate": 1.466586148876181e-09, "logits/chosen": 0.01698627881705761, "logits/rejected": 0.0677523985505104, "logps/chosen": -188.00369262695312, "logps/rejected": -246.04519653320312, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.600970983505249, "rewards/margins": 14.572479248046875, "rewards/rejected": -15.173450469970703, "step": 5762 }, { "epoch": 1.97, "learning_rate": 1.436815657967605e-09, "logits/chosen": 0.022804420441389084, "logits/rejected": 0.08089447766542435, "logps/chosen": -194.99110412597656, "logps/rejected": -284.2130432128906, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -2.5188329219818115, "rewards/margins": 13.12105655670166, "rewards/rejected": -15.639890670776367, "step": 5763 }, { "epoch": 1.97, "learning_rate": 1.4073502143313599e-09, "logits/chosen": 0.013064912520349026, "logits/rejected": 0.06827984750270844, "logps/chosen": -262.3677673339844, "logps/rejected": -386.3375244140625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.6135319471359253, "rewards/margins": 17.96448516845703, "rewards/rejected": -19.578018188476562, "step": 5764 }, { "epoch": 1.97, "learning_rate": 1.3781898269688008e-09, "logits/chosen": 0.09828904271125793, "logits/rejected": 0.11749474704265594, "logps/chosen": -193.36520385742188, "logps/rejected": -440.75018310546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.099287509918213, "rewards/margins": 22.924184799194336, "rewards/rejected": -25.02347183227539, "step": 5765 }, { "epoch": 1.97, "learning_rate": 1.3493345047879134e-09, "logits/chosen": 0.032927628606557846, "logits/rejected": 0.06484101712703705, "logps/chosen": -216.8724822998047, "logps/rejected": -399.5457763671875, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -2.830129861831665, "rewards/margins": 19.004093170166016, "rewards/rejected": -21.83422088623047, "step": 5766 }, { "epoch": 1.97, "learning_rate": 1.3207842566037575e-09, "logits/chosen": -0.024753041565418243, "logits/rejected": -0.006941042374819517, "logps/chosen": -300.59625244140625, "logps/rejected": -492.5409240722656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3056068420410156, "rewards/margins": 19.481956481933594, "rewards/rejected": -20.78756332397461, "step": 5767 }, { "epoch": 1.97, "learning_rate": 1.2925390911379119e-09, "logits/chosen": 0.06349465996026993, "logits/rejected": 0.08991765230894089, "logps/chosen": -184.84169006347656, "logps/rejected": -333.3659973144531, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.6581315994262695, "rewards/margins": 15.198144912719727, "rewards/rejected": -16.856277465820312, "step": 5768 }, { "epoch": 1.97, "learning_rate": 1.2645990170188081e-09, "logits/chosen": 0.08335825800895691, "logits/rejected": 0.11601415276527405, "logps/chosen": -167.98666381835938, "logps/rejected": -240.0986785888672, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.479628562927246, "rewards/margins": 11.041715621948242, "rewards/rejected": -12.521345138549805, "step": 5769 }, { "epoch": 1.97, "learning_rate": 1.2369640427819518e-09, "logits/chosen": 0.07365592569112778, "logits/rejected": 0.09445662796497345, "logps/chosen": -206.44091796875, "logps/rejected": -406.67657470703125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1092116832733154, "rewards/margins": 18.367816925048828, "rewards/rejected": -19.477027893066406, "step": 5770 }, { "epoch": 1.97, "learning_rate": 1.2096341768692563e-09, "logits/chosen": 0.12119131535291672, "logits/rejected": 0.14106345176696777, "logps/chosen": -208.89947509765625, "logps/rejected": -362.2783508300781, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.8748959302902222, "rewards/margins": 14.90543270111084, "rewards/rejected": -16.78032875061035, "step": 5771 }, { "epoch": 1.97, "learning_rate": 1.1826094276298215e-09, "logits/chosen": -0.013852291740477085, "logits/rejected": 0.020553037524223328, "logps/chosen": -301.8432922363281, "logps/rejected": -566.1087646484375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.2147421836853027, "rewards/margins": 24.060949325561523, "rewards/rejected": -22.846206665039062, "step": 5772 }, { "epoch": 1.97, "learning_rate": 1.1558898033191544e-09, "logits/chosen": 0.07458633184432983, "logits/rejected": 0.11170071363449097, "logps/chosen": -225.07656860351562, "logps/rejected": -409.8123474121094, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.036179542541504, "rewards/margins": 19.068180084228516, "rewards/rejected": -20.104358673095703, "step": 5773 }, { "epoch": 1.97, "learning_rate": 1.1294753120998368e-09, "logits/chosen": 0.07303444296121597, "logits/rejected": 0.09004246443510056, "logps/chosen": -221.98480224609375, "logps/rejected": -379.3352966308594, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -3.521341323852539, "rewards/margins": 16.945152282714844, "rewards/rejected": -20.466493606567383, "step": 5774 }, { "epoch": 1.97, "learning_rate": 1.1033659620410807e-09, "logits/chosen": 0.12804941833019257, "logits/rejected": 0.16419264674186707, "logps/chosen": -169.51611328125, "logps/rejected": -259.0265197753906, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5269471406936646, "rewards/margins": 13.433762550354004, "rewards/rejected": -14.960708618164062, "step": 5775 }, { "epoch": 1.97, "learning_rate": 1.0775617611189503e-09, "logits/chosen": 0.09211035817861557, "logits/rejected": 0.10654586553573608, "logps/chosen": -241.03895568847656, "logps/rejected": -371.4818115234375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.1689977645874023, "rewards/margins": 15.660669326782227, "rewards/rejected": -17.829668045043945, "step": 5776 }, { "epoch": 1.97, "learning_rate": 1.0520627172162511e-09, "logits/chosen": 0.04961519315838814, "logits/rejected": 0.10052507370710373, "logps/chosen": -228.52537536621094, "logps/rejected": -382.2449951171875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.0842959880828857, "rewards/margins": 15.610238075256348, "rewards/rejected": -17.694534301757812, "step": 5777 }, { "epoch": 1.97, "learning_rate": 1.026868838122641e-09, "logits/chosen": 0.07653789967298508, "logits/rejected": 0.12571106851100922, "logps/chosen": -172.4698944091797, "logps/rejected": -241.5569305419922, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.2551621198654175, "rewards/margins": 13.272250175476074, "rewards/rejected": -14.527411460876465, "step": 5778 }, { "epoch": 1.97, "learning_rate": 1.0019801315344079e-09, "logits/chosen": 0.0739484503865242, "logits/rejected": 0.10937712341547012, "logps/chosen": -231.67434692382812, "logps/rejected": -359.0827941894531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.500636577606201, "rewards/margins": 14.57522964477539, "rewards/rejected": -18.07586669921875, "step": 5779 }, { "epoch": 1.97, "learning_rate": 9.773966050549143e-10, "logits/chosen": 0.10733167827129364, "logits/rejected": 0.12409897893667221, "logps/chosen": -211.85919189453125, "logps/rejected": -401.0562438964844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.0679473876953125, "rewards/margins": 19.424209594726562, "rewards/rejected": -21.492156982421875, "step": 5780 }, { "epoch": 1.97, "learning_rate": 9.531182661938197e-10, "logits/chosen": 0.15544772148132324, "logits/rejected": 0.15516209602355957, "logps/chosen": -114.14639282226562, "logps/rejected": -288.7342224121094, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.8075896501541138, "rewards/margins": 13.200000762939453, "rewards/rejected": -15.007591247558594, "step": 5781 }, { "epoch": 1.97, "learning_rate": 9.291451223679691e-10, "logits/chosen": -0.007813663221895695, "logits/rejected": 0.012807403691112995, "logps/chosen": -206.96987915039062, "logps/rejected": -343.94775390625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7748571634292603, "rewards/margins": 13.761759757995605, "rewards/rejected": -15.536616325378418, "step": 5782 }, { "epoch": 1.97, "learning_rate": 9.054771809008376e-10, "logits/chosen": 0.05476643517613411, "logits/rejected": 0.09775490313768387, "logps/chosen": -211.16273498535156, "logps/rejected": -394.7757873535156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.9311788082122803, "rewards/margins": 19.03377342224121, "rewards/rejected": -21.964950561523438, "step": 5783 }, { "epoch": 1.97, "learning_rate": 8.821144490225307e-10, "logits/chosen": 0.0305604487657547, "logits/rejected": 0.03793046995997429, "logps/chosen": -183.27392578125, "logps/rejected": -425.4044494628906, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.447451591491699, "rewards/margins": 19.286090850830078, "rewards/rejected": -21.733543395996094, "step": 5784 }, { "epoch": 1.97, "learning_rate": 8.590569338702281e-10, "logits/chosen": 0.05710949748754501, "logits/rejected": 0.07679890096187592, "logps/chosen": -226.95298767089844, "logps/rejected": -412.00103759765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9588663578033447, "rewards/margins": 18.002275466918945, "rewards/rejected": -19.96114158630371, "step": 5785 }, { "epoch": 1.97, "learning_rate": 8.363046424875175e-10, "logits/chosen": 0.05942656099796295, "logits/rejected": 0.12833288311958313, "logps/chosen": -234.4687042236328, "logps/rejected": -295.7165222167969, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -1.4664607048034668, "rewards/margins": 13.381265640258789, "rewards/rejected": -14.847725868225098, "step": 5786 }, { "epoch": 1.98, "learning_rate": 8.138575818250615e-10, "logits/chosen": 0.1500990241765976, "logits/rejected": 0.19468140602111816, "logps/chosen": -227.2803192138672, "logps/rejected": -302.5191650390625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.251182496547699, "rewards/margins": 14.534690856933594, "rewards/rejected": -14.28350830078125, "step": 5787 }, { "epoch": 1.98, "learning_rate": 7.917157587399303e-10, "logits/chosen": 0.060809873044490814, "logits/rejected": 0.1103740781545639, "logps/chosen": -225.4579315185547, "logps/rejected": -402.1427001953125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.2277536392211914, "rewards/margins": 17.989301681518555, "rewards/rejected": -21.217056274414062, "step": 5788 }, { "epoch": 1.98, "learning_rate": 7.698791799963799e-10, "logits/chosen": 0.011758052743971348, "logits/rejected": 0.06514634937047958, "logps/chosen": -233.3534393310547, "logps/rejected": -391.77581787109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8386768102645874, "rewards/margins": 18.822280883789062, "rewards/rejected": -20.66095733642578, "step": 5789 }, { "epoch": 1.98, "learning_rate": 7.483478522649634e-10, "logits/chosen": 0.09996302425861359, "logits/rejected": 0.10686144977807999, "logps/chosen": -151.17919921875, "logps/rejected": -339.24188232421875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.728626012802124, "rewards/margins": 15.01506233215332, "rewards/rejected": -17.743688583374023, "step": 5790 }, { "epoch": 1.98, "learning_rate": 7.271217821233077e-10, "logits/chosen": 0.062187276780605316, "logits/rejected": 0.07662045955657959, "logps/chosen": -276.21429443359375, "logps/rejected": -489.5269470214844, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6644257307052612, "rewards/margins": 22.95801544189453, "rewards/rejected": -23.622438430786133, "step": 5791 }, { "epoch": 1.98, "learning_rate": 7.062009760557819e-10, "logits/chosen": 0.0012283864198252559, "logits/rejected": 0.06885989755392075, "logps/chosen": -194.88328552246094, "logps/rejected": -392.1431579589844, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.8330252170562744, "rewards/margins": 20.654542922973633, "rewards/rejected": -22.487565994262695, "step": 5792 }, { "epoch": 1.98, "learning_rate": 6.855854404532735e-10, "logits/chosen": 0.20157966017723083, "logits/rejected": 0.20658737421035767, "logps/chosen": -137.7157745361328, "logps/rejected": -270.2196960449219, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.0330700874328613, "rewards/margins": 12.481132507324219, "rewards/rejected": -15.514204025268555, "step": 5793 }, { "epoch": 1.98, "learning_rate": 6.652751816135227e-10, "logits/chosen": 0.14812695980072021, "logits/rejected": 0.16609074175357819, "logps/chosen": -206.54808044433594, "logps/rejected": -418.33935546875, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -2.0993309020996094, "rewards/margins": 20.77771759033203, "rewards/rejected": -22.87704849243164, "step": 5794 }, { "epoch": 1.98, "learning_rate": 6.452702057411218e-10, "logits/chosen": 0.1446271687746048, "logits/rejected": 0.17320555448532104, "logps/chosen": -123.08480834960938, "logps/rejected": -205.76193237304688, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.7671111822128296, "rewards/margins": 11.92723274230957, "rewards/rejected": -12.694345474243164, "step": 5795 }, { "epoch": 1.98, "learning_rate": 6.255705189471827e-10, "logits/chosen": 0.19127771258354187, "logits/rejected": 0.19814737141132355, "logps/chosen": -168.79168701171875, "logps/rejected": -376.2193603515625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.9836046695709229, "rewards/margins": 18.655662536621094, "rewards/rejected": -20.639266967773438, "step": 5796 }, { "epoch": 1.98, "learning_rate": 6.061761272498911e-10, "logits/chosen": 0.07725945860147476, "logits/rejected": 0.10871589183807373, "logps/chosen": -221.22616577148438, "logps/rejected": -361.4208068847656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.6890764236450195, "rewards/margins": 16.454532623291016, "rewards/rejected": -18.14360809326172, "step": 5797 }, { "epoch": 1.98, "learning_rate": 5.870870365738412e-10, "logits/chosen": 0.1557687371969223, "logits/rejected": 0.18498452007770538, "logps/chosen": -186.45260620117188, "logps/rejected": -276.1171569824219, "loss": 0.0294, "rewards/accuracies": 0.875, "rewards/chosen": -3.159653902053833, "rewards/margins": 9.937427520751953, "rewards/rejected": -13.097082138061523, "step": 5798 }, { "epoch": 1.98, "learning_rate": 5.683032527504794e-10, "logits/chosen": -0.07249120622873306, "logits/rejected": -0.03544637933373451, "logps/chosen": -201.02159118652344, "logps/rejected": -365.5687255859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.559108018875122, "rewards/margins": 15.969658851623535, "rewards/rejected": -18.528766632080078, "step": 5799 }, { "epoch": 1.98, "learning_rate": 5.498247815179935e-10, "logits/chosen": 0.09832146018743515, "logits/rejected": 0.10874678194522858, "logps/chosen": -236.6554718017578, "logps/rejected": -439.77001953125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.9534845352172852, "rewards/margins": 17.409330368041992, "rewards/rejected": -19.36281394958496, "step": 5800 }, { "epoch": 1.98, "learning_rate": 5.316516285213124e-10, "logits/chosen": 0.19681492447853088, "logits/rejected": 0.21667133271694183, "logps/chosen": -195.45095825195312, "logps/rejected": -336.64068603515625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.3174717426300049, "rewards/margins": 16.02977180480957, "rewards/rejected": -17.34724235534668, "step": 5801 }, { "epoch": 1.98, "learning_rate": 5.137837993121064e-10, "logits/chosen": -0.03906786069273949, "logits/rejected": -0.010667407885193825, "logps/chosen": -257.14154052734375, "logps/rejected": -455.5458068847656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4207336902618408, "rewards/margins": 19.39995765686035, "rewards/rejected": -20.820690155029297, "step": 5802 }, { "epoch": 1.98, "learning_rate": 4.962212993487868e-10, "logits/chosen": 0.049903158098459244, "logits/rejected": 0.0701867863535881, "logps/chosen": -215.34132385253906, "logps/rejected": -402.4737548828125, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -2.1322898864746094, "rewards/margins": 16.965049743652344, "rewards/rejected": -19.097341537475586, "step": 5803 }, { "epoch": 1.98, "learning_rate": 4.789641339963957e-10, "logits/chosen": 0.06285480409860611, "logits/rejected": 0.05887753516435623, "logps/chosen": -160.78433227539062, "logps/rejected": -335.1217041015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.38014426827430725, "rewards/margins": 15.751029968261719, "rewards/rejected": -15.370885848999023, "step": 5804 }, { "epoch": 1.98, "learning_rate": 4.620123085267158e-10, "logits/chosen": 0.08090639859437943, "logits/rejected": 0.09737134724855423, "logps/chosen": -158.41404724121094, "logps/rejected": -382.43719482421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.8450815677642822, "rewards/margins": 17.94500160217285, "rewards/rejected": -20.790082931518555, "step": 5805 }, { "epoch": 1.98, "learning_rate": 4.453658281183825e-10, "logits/chosen": 0.08982445299625397, "logits/rejected": 0.16082681715488434, "logps/chosen": -174.46051025390625, "logps/rejected": -324.84283447265625, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -1.614750862121582, "rewards/margins": 17.381826400756836, "rewards/rejected": -18.996578216552734, "step": 5806 }, { "epoch": 1.98, "learning_rate": 4.290246978566614e-10, "logits/chosen": 0.14091706275939941, "logits/rejected": 0.16291293501853943, "logps/chosen": -185.8735809326172, "logps/rejected": -289.2099914550781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.917254090309143, "rewards/margins": 11.987863540649414, "rewards/rejected": -13.905117988586426, "step": 5807 }, { "epoch": 1.98, "learning_rate": 4.129889227334482e-10, "logits/chosen": 0.1349230855703354, "logits/rejected": 0.17207568883895874, "logps/chosen": -209.9008026123047, "logps/rejected": -324.46124267578125, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -1.955803394317627, "rewards/margins": 15.272172927856445, "rewards/rejected": -17.227977752685547, "step": 5808 }, { "epoch": 1.98, "learning_rate": 3.9725850764760206e-10, "logits/chosen": -0.0944402813911438, "logits/rejected": -0.058810990303754807, "logps/chosen": -229.42230224609375, "logps/rejected": -448.7803955078125, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -2.0463669300079346, "rewards/margins": 21.895185470581055, "rewards/rejected": -23.941551208496094, "step": 5809 }, { "epoch": 1.98, "learning_rate": 3.8183345740439024e-10, "logits/chosen": -0.009051834233105183, "logits/rejected": 0.04349071532487869, "logps/chosen": -202.5230255126953, "logps/rejected": -338.47991943359375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.7960171699523926, "rewards/margins": 16.956951141357422, "rewards/rejected": -18.752967834472656, "step": 5810 }, { "epoch": 1.98, "learning_rate": 3.667137767160433e-10, "logits/chosen": 0.08244391530752182, "logits/rejected": 0.11839602142572403, "logps/chosen": -222.9161834716797, "logps/rejected": -373.624267578125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.5581016540527344, "rewards/margins": 15.656686782836914, "rewards/rejected": -18.21478843688965, "step": 5811 }, { "epoch": 1.98, "learning_rate": 3.518994702014222e-10, "logits/chosen": 0.04693280905485153, "logits/rejected": 0.09581442177295685, "logps/chosen": -234.5560302734375, "logps/rejected": -340.4913330078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4359397888183594, "rewards/margins": 15.663172721862793, "rewards/rejected": -17.09911346435547, "step": 5812 }, { "epoch": 1.98, "learning_rate": 3.373905423860179e-10, "logits/chosen": 0.06267280131578445, "logits/rejected": 0.07426446676254272, "logps/chosen": -184.81427001953125, "logps/rejected": -377.632080078125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.849994421005249, "rewards/margins": 16.647804260253906, "rewards/rejected": -18.497798919677734, "step": 5813 }, { "epoch": 1.98, "learning_rate": 3.2318699770217397e-10, "logits/chosen": -0.0886038988828659, "logits/rejected": -0.0329253226518631, "logps/chosen": -221.74195861816406, "logps/rejected": -317.80706787109375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.7230181097984314, "rewards/margins": 15.23813247680664, "rewards/rejected": -15.961152076721191, "step": 5814 }, { "epoch": 1.98, "learning_rate": 3.092888404888638e-10, "logits/chosen": 0.09293588250875473, "logits/rejected": 0.131127268075943, "logps/chosen": -178.4605712890625, "logps/rejected": -273.5951232910156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.227275013923645, "rewards/margins": 13.855655670166016, "rewards/rejected": -15.082929611206055, "step": 5815 }, { "epoch": 1.98, "learning_rate": 2.956960749918025e-10, "logits/chosen": 0.07877885550260544, "logits/rejected": 0.11327169090509415, "logps/chosen": -216.38571166992188, "logps/rejected": -355.2979736328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.7766664624214172, "rewards/margins": 17.840194702148438, "rewards/rejected": -18.616859436035156, "step": 5816 }, { "epoch": 1.99, "learning_rate": 2.8240870536333503e-10, "logits/chosen": 0.09155861288309097, "logits/rejected": 0.09773573279380798, "logps/chosen": -148.28057861328125, "logps/rejected": -321.1471252441406, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1911559104919434, "rewards/margins": 16.032012939453125, "rewards/rejected": -17.223167419433594, "step": 5817 }, { "epoch": 1.99, "learning_rate": 2.6942673566265894e-10, "logits/chosen": 0.040005750954151154, "logits/rejected": 0.08877343684434891, "logps/chosen": -188.78182983398438, "logps/rejected": -331.46112060546875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.1311275959014893, "rewards/margins": 16.357847213745117, "rewards/rejected": -18.48897361755371, "step": 5818 }, { "epoch": 1.99, "learning_rate": 2.567501698554908e-10, "logits/chosen": 0.07943574339151382, "logits/rejected": 0.10341345518827438, "logps/chosen": -157.47955322265625, "logps/rejected": -333.0390625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.5768043994903564, "rewards/margins": 15.905508041381836, "rewards/rejected": -17.48231315612793, "step": 5819 }, { "epoch": 1.99, "learning_rate": 2.4437901181439955e-10, "logits/chosen": 0.14015541970729828, "logits/rejected": 0.15799543261528015, "logps/chosen": -182.56983947753906, "logps/rejected": -343.7646179199219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1720333099365234, "rewards/margins": 16.575511932373047, "rewards/rejected": -18.74754524230957, "step": 5820 }, { "epoch": 1.99, "learning_rate": 2.3231326531858441e-10, "logits/chosen": -0.012337482534348965, "logits/rejected": 0.0006122099002823234, "logps/chosen": -294.9481201171875, "logps/rejected": -418.6904602050781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.6194301843643188, "rewards/margins": 15.863690376281738, "rewards/rejected": -17.48311996459961, "step": 5821 }, { "epoch": 1.99, "learning_rate": 2.205529340539858e-10, "logits/chosen": 0.11212374269962311, "logits/rejected": 0.14561176300048828, "logps/chosen": -227.67825317382812, "logps/rejected": -394.6459045410156, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.3801159858703613, "rewards/margins": 17.707212448120117, "rewards/rejected": -19.087329864501953, "step": 5822 }, { "epoch": 1.99, "learning_rate": 2.0909802161317435e-10, "logits/chosen": -0.06909158080816269, "logits/rejected": -0.027532102540135384, "logps/chosen": -276.21234130859375, "logps/rejected": -416.45843505859375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2477798461914062, "rewards/margins": 16.634368896484375, "rewards/rejected": -17.88214874267578, "step": 5823 }, { "epoch": 1.99, "learning_rate": 1.9794853149557312e-10, "logits/chosen": 0.1471908837556839, "logits/rejected": 0.16526897251605988, "logps/chosen": -201.14361572265625, "logps/rejected": -349.4851989746094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0203893184661865, "rewards/margins": 16.89768409729004, "rewards/rejected": -17.918071746826172, "step": 5824 }, { "epoch": 1.99, "learning_rate": 1.8710446710701321e-10, "logits/chosen": 0.005977436434477568, "logits/rejected": 0.020358901470899582, "logps/chosen": -223.01815795898438, "logps/rejected": -319.9001159667969, "loss": 0.0309, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3227270245552063, "rewards/margins": 11.96023178100586, "rewards/rejected": -12.282958030700684, "step": 5825 }, { "epoch": 1.99, "learning_rate": 1.765658317604002e-10, "logits/chosen": 0.01345091313123703, "logits/rejected": 0.028494350612163544, "logps/chosen": -281.0801086425781, "logps/rejected": -473.9888916015625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.8055614233016968, "rewards/margins": 19.84111785888672, "rewards/rejected": -20.646678924560547, "step": 5826 }, { "epoch": 1.99, "learning_rate": 1.6633262867504773e-10, "logits/chosen": 0.03629351407289505, "logits/rejected": 0.08365320414304733, "logps/chosen": -227.7891082763672, "logps/rejected": -352.04296875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.3554043769836426, "rewards/margins": 15.735836029052734, "rewards/rejected": -19.09123992919922, "step": 5827 }, { "epoch": 1.99, "learning_rate": 1.564048609771218e-10, "logits/chosen": -0.005542968865483999, "logits/rejected": -0.0008358177146874368, "logps/chosen": -259.0640563964844, "logps/rejected": -441.8807678222656, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.8414920568466187, "rewards/margins": 17.67650032043457, "rewards/rejected": -19.51799201965332, "step": 5828 }, { "epoch": 1.99, "learning_rate": 1.4678253169930764e-10, "logits/chosen": 0.10416603833436966, "logits/rejected": 0.15475916862487793, "logps/chosen": -200.71746826171875, "logps/rejected": -320.9095153808594, "loss": 0.0198, "rewards/accuracies": 0.9375, "rewards/chosen": -2.910621404647827, "rewards/margins": 15.235349655151367, "rewards/rejected": -18.145971298217773, "step": 5829 }, { "epoch": 1.99, "learning_rate": 1.3746564378114278e-10, "logits/chosen": 0.13379007577896118, "logits/rejected": 0.18277519941329956, "logps/chosen": -210.79071044921875, "logps/rejected": -393.5135192871094, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.2115797996520996, "rewards/margins": 21.665729522705078, "rewards/rejected": -22.87730598449707, "step": 5830 }, { "epoch": 1.99, "learning_rate": 1.2845420006879493e-10, "logits/chosen": 0.047640420496463776, "logits/rejected": 0.09437006711959839, "logps/chosen": -203.9637451171875, "logps/rejected": -337.83795166015625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.5006383061408997, "rewards/margins": 17.982955932617188, "rewards/rejected": -17.482318878173828, "step": 5831 }, { "epoch": 1.99, "learning_rate": 1.197482033151731e-10, "logits/chosen": 0.07959876954555511, "logits/rejected": 0.08470147103071213, "logps/chosen": -170.66940307617188, "logps/rejected": -344.9428405761719, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": -1.5199030637741089, "rewards/margins": 15.682658195495605, "rewards/rejected": -17.202560424804688, "step": 5832 }, { "epoch": 1.99, "learning_rate": 1.1134765617981657e-10, "logits/chosen": -0.09382495284080505, "logits/rejected": -0.046193379908800125, "logps/chosen": -255.60646057128906, "logps/rejected": -358.1398620605469, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.697662591934204, "rewards/margins": 15.156582832336426, "rewards/rejected": -16.854246139526367, "step": 5833 }, { "epoch": 1.99, "learning_rate": 1.0325256122911686e-10, "logits/chosen": -0.0011368468403816223, "logits/rejected": 0.011166652664542198, "logps/chosen": -154.54525756835938, "logps/rejected": -391.2733154296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1227633953094482, "rewards/margins": 19.471986770629883, "rewards/rejected": -21.594749450683594, "step": 5834 }, { "epoch": 1.99, "learning_rate": 9.546292093576269e-11, "logits/chosen": 0.12613020837306976, "logits/rejected": 0.1344747543334961, "logps/chosen": -190.1756591796875, "logps/rejected": -372.70819091796875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.3879268169403076, "rewards/margins": 16.951122283935547, "rewards/rejected": -19.339048385620117, "step": 5835 }, { "epoch": 1.99, "learning_rate": 8.797873767951713e-11, "logits/chosen": 0.12034618854522705, "logits/rejected": 0.1596415787935257, "logps/chosen": -185.55975341796875, "logps/rejected": -342.8599853515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.8520634174346924, "rewards/margins": 17.188100814819336, "rewards/rejected": -19.040163040161133, "step": 5836 }, { "epoch": 1.99, "learning_rate": 8.080001374666246e-11, "logits/chosen": 0.03670400008559227, "logits/rejected": 0.04632692039012909, "logps/chosen": -219.14056396484375, "logps/rejected": -390.31585693359375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.5634043216705322, "rewards/margins": 18.267982482910156, "rewards/rejected": -18.831388473510742, "step": 5837 }, { "epoch": 1.99, "learning_rate": 7.392675133022219e-11, "logits/chosen": -0.02553573064506054, "logits/rejected": 0.008275080472230911, "logps/chosen": -241.9555206298828, "logps/rejected": -354.8349609375, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.5119738578796387, "rewards/margins": 14.832135200500488, "rewards/rejected": -16.34410858154297, "step": 5838 }, { "epoch": 1.99, "learning_rate": 6.735895252996115e-11, "logits/chosen": 0.06307070702314377, "logits/rejected": 0.09425770491361618, "logps/chosen": -179.4792938232422, "logps/rejected": -284.897216796875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.3041245937347412, "rewards/margins": 12.698797225952148, "rewards/rejected": -14.002923011779785, "step": 5839 }, { "epoch": 1.99, "learning_rate": 6.109661935205234e-11, "logits/chosen": -0.0446220301091671, "logits/rejected": 0.02654232643544674, "logps/chosen": -252.04061889648438, "logps/rejected": -365.8668212890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.531957745552063, "rewards/margins": 19.208765029907227, "rewards/rejected": -20.74072265625, "step": 5840 }, { "epoch": 1.99, "learning_rate": 5.513975370974311e-11, "logits/chosen": -0.009219837374985218, "logits/rejected": 0.039572443813085556, "logps/chosen": -288.08343505859375, "logps/rejected": -366.48028564453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9384779930114746, "rewards/margins": 13.973687171936035, "rewards/rejected": -15.912164688110352, "step": 5841 }, { "epoch": 1.99, "learning_rate": 4.948835742268897e-11, "logits/chosen": 0.08878552168607712, "logits/rejected": 0.10331949591636658, "logps/chosen": -173.16513061523438, "logps/rejected": -373.68731689453125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.8997347354888916, "rewards/margins": 19.713842391967773, "rewards/rejected": -20.61357879638672, "step": 5842 }, { "epoch": 1.99, "learning_rate": 4.4142432217286754e-11, "logits/chosen": 0.03547339513897896, "logits/rejected": 0.0806223601102829, "logps/chosen": -243.3116912841797, "logps/rejected": -344.7597351074219, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.9943249225616455, "rewards/margins": 16.861669540405273, "rewards/rejected": -17.855995178222656, "step": 5843 }, { "epoch": 1.99, "learning_rate": 3.910197972667451e-11, "logits/chosen": 0.08827055245637894, "logits/rejected": 0.11411202698945999, "logps/chosen": -197.7501983642578, "logps/rejected": -317.73809814453125, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.783414125442505, "rewards/margins": 13.266868591308594, "rewards/rejected": -16.050281524658203, "step": 5844 }, { "epoch": 1.99, "learning_rate": 3.436700149062055e-11, "logits/chosen": 0.11705609411001205, "logits/rejected": 0.14512601494789124, "logps/chosen": -191.20191955566406, "logps/rejected": -372.17236328125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.9459850788116455, "rewards/margins": 17.23833465576172, "rewards/rejected": -19.1843204498291, "step": 5845 }, { "epoch": 2.0, "learning_rate": 2.993749895574549e-11, "logits/chosen": 0.09530644863843918, "logits/rejected": 0.15510649979114532, "logps/chosen": -181.39031982421875, "logps/rejected": -361.6470642089844, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.5167131423950195, "rewards/margins": 16.70277214050293, "rewards/rejected": -19.219484329223633, "step": 5846 }, { "epoch": 2.0, "learning_rate": 2.5813473474967096e-11, "logits/chosen": 0.11066164076328278, "logits/rejected": 0.12770996987819672, "logps/chosen": -129.9010467529297, "logps/rejected": -320.7065124511719, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -2.0442848205566406, "rewards/margins": 18.490068435668945, "rewards/rejected": -20.534353256225586, "step": 5847 }, { "epoch": 2.0, "learning_rate": 2.1994926308277483e-11, "logits/chosen": 0.17086783051490784, "logits/rejected": 0.20407965779304504, "logps/chosen": -216.4322509765625, "logps/rejected": -345.33782958984375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.008280873298645, "rewards/margins": 15.802693367004395, "rewards/rejected": -16.81097412109375, "step": 5848 }, { "epoch": 2.0, "learning_rate": 1.8481858622076963e-11, "logits/chosen": 0.02269790507853031, "logits/rejected": 0.0475911982357502, "logps/chosen": -195.61802673339844, "logps/rejected": -317.6656799316406, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.6922435760498047, "rewards/margins": 14.803217887878418, "rewards/rejected": -16.495460510253906, "step": 5849 }, { "epoch": 2.0, "learning_rate": 1.527427148972915e-11, "logits/chosen": 0.05862908065319061, "logits/rejected": 0.09684605151414871, "logps/chosen": -256.5860595703125, "logps/rejected": -319.1742248535156, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.6852660179138184, "rewards/margins": 12.541199684143066, "rewards/rejected": -13.22646713256836, "step": 5850 }, { "epoch": 2.0, "learning_rate": 1.2372165890894848e-11, "logits/chosen": -0.051999807357788086, "logits/rejected": -0.04016151279211044, "logps/chosen": -229.8362579345703, "logps/rejected": -419.4264831542969, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0677876472473145, "rewards/margins": 16.333656311035156, "rewards/rejected": -17.401443481445312, "step": 5851 }, { "epoch": 2.0, "learning_rate": 9.775542712309182e-12, "logits/chosen": 0.07495659589767456, "logits/rejected": 0.10924974828958511, "logps/chosen": -194.46844482421875, "logps/rejected": -311.94256591796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.834767460823059, "rewards/margins": 14.929481506347656, "rewards/rejected": -16.764249801635742, "step": 5852 }, { "epoch": 2.0, "learning_rate": 7.484402747115482e-12, "logits/chosen": 0.074849933385849, "logits/rejected": 0.09610404819250107, "logps/chosen": -210.3170928955078, "logps/rejected": -367.49420166015625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.260467052459717, "rewards/margins": 16.316070556640625, "rewards/rejected": -18.576536178588867, "step": 5853 }, { "epoch": 2.0, "learning_rate": 5.498746695198342e-12, "logits/chosen": -0.04423106461763382, "logits/rejected": -0.04224763810634613, "logps/chosen": -241.53292846679688, "logps/rejected": -484.0286865234375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.1900978088378906, "rewards/margins": 20.291683197021484, "rewards/rejected": -22.481782913208008, "step": 5854 }, { "epoch": 2.0, "learning_rate": 3.818575163183624e-12, "logits/chosen": 0.21203742921352386, "logits/rejected": 0.2276579588651657, "logps/chosen": -176.21067810058594, "logps/rejected": -357.2720642089844, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.5782510042190552, "rewards/margins": 18.613224029541016, "rewards/rejected": -20.19147491455078, "step": 5855 }, { "epoch": 2.0, "learning_rate": 2.4438886643274314e-12, "logits/chosen": 0.11532409489154816, "logits/rejected": 0.14387980103492737, "logps/chosen": -191.62246704101562, "logps/rejected": -345.50885009765625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.7515897154808044, "rewards/margins": 16.982152938842773, "rewards/rejected": -17.73374366760254, "step": 5856 }, { "epoch": 2.0, "learning_rate": 1.374687618627135e-12, "logits/chosen": -0.008204166777431965, "logits/rejected": 0.044066883623600006, "logps/chosen": -217.99636840820312, "logps/rejected": -345.033447265625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.4133399426937103, "rewards/margins": 17.80525779724121, "rewards/rejected": -18.218597412109375, "step": 5857 }, { "epoch": 2.0, "learning_rate": 6.109723527103483e-13, "logits/chosen": 0.11745410412549973, "logits/rejected": 0.1499621719121933, "logps/chosen": -201.99020385742188, "logps/rejected": -327.41558837890625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5998852252960205, "rewards/margins": 15.426535606384277, "rewards/rejected": -16.02642059326172, "step": 5858 }, { "epoch": 2.0, "learning_rate": 1.5274309983492883e-13, "logits/chosen": 0.03009359911084175, "logits/rejected": 0.019010717049241066, "logps/chosen": -261.15484619140625, "logps/rejected": -473.71051025390625, "loss": 0.0283, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6242518424987793, "rewards/margins": 18.49045753479004, "rewards/rejected": -20.114709854125977, "step": 5859 }, { "epoch": 2.0, "learning_rate": 0.0, "logits/chosen": 0.1274423599243164, "logits/rejected": 0.12147689610719681, "logps/chosen": -222.64886474609375, "logps/rejected": -417.8114318847656, "loss": 0.0372, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5563544034957886, "rewards/margins": 15.248568534851074, "rewards/rejected": -15.804922103881836, "step": 5860 }, { "epoch": 2.0, "step": 5860, "total_flos": 0.0, "train_loss": 0.03752805318084498, "train_runtime": 93720.2986, "train_samples_per_second": 4.001, "train_steps_per_second": 0.063 } ], "max_steps": 5860, "num_train_epochs": 2, "total_flos": 0.0, "trial_name": null, "trial_params": null }